def generate_kornia_transforms(image_size=224, resize=256, mean=[], std=[], include_jitter=False): mean=torch.tensor(mean) if mean else torch.tensor([0.5, 0.5, 0.5]) std=torch.tensor(std) if std else torch.tensor([0.1, 0.1, 0.1]) if torch.cuda.is_available(): mean=mean.cuda() std=std.cuda() train_transforms=[G.Resize((resize,resize))] if include_jitter: train_transforms.append(K.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)) train_transforms.extend([K.RandomHorizontalFlip(p=0.5), K.RandomVerticalFlip(p=0.5), K.RandomRotation(90), K.RandomResizedCrop((image_size,image_size)), K.Normalize(mean,std) ]) val_transforms=[G.Resize((resize,resize)), K.CenterCrop((image_size,image_size)), K.Normalize(mean,std) ] transforms=dict(train=nn.Sequential(*train_transforms), val=nn.Sequential(*val_transforms)) if torch.cuda.is_available(): for k in transforms: transforms[k]=transforms[k].cuda() return transforms
def stitch(x, M_matrices, M_rotations, M_flip, label=True): #Preprocessing: image stitch data = [] #list to store all the features maps from multi-views for i in range(6): #get a batch of *same* view images img_batch = x[:, i, :, :, :] # torch.stack(x)[:,i,:,:,:] # img_warp = kornia.warp_perspective(img_batch, M_matrices[i].unsqueeze(0).repeat( len(x), 1, 1), dsize=(219, 306)) img_rotated = kornia.warp_affine(img_warp, M_rotations[i].unsqueeze(0).repeat( len(x), 1, 1), dsize=(219, 306)) data.append(img_rotated) data = torch.cat(data, dim=0).view(6, len(x), 3, 219, 306) #max pool feature maps from multi-view:black canvas and ensemble h, w = 219, 306 #print(h,w) agg = torch.zeros((x.shape[0], 3, 2 * h, 2 * w)) #[batch_size, 3 ,h, w], twice width/height if torch.cuda.is_available(): agg = agg.cuda() #two bases: front and back view agg[:, :, 0:h, (w - w // 2):(w + w // 2)] = data[1] agg[:, :, h:, (w - w // 2):(w + w // 2)] = data[4] #top left agg[:, :, (0 + 55):(h + 55), (0 + 55):(w + 55)] = torch.max( data[0], agg[:, :, (0 + 55):(h + 55), (0 + 55):(w + 55)]) #top right agg[:, :, (0 + 55):(h + 55), (w - 55):(-55)] = torch.max( data[2], agg[:, :, (0 + 55):(h + 55), (w - 55):(-55)]) #bottom left agg[:, :, (h - 55):(-55), (0 + 55):(w + 55)] = torch.max( data[3], agg[:, :, (h - 55):(-55), (0 + 55):(w + 55)]) #bottom right agg[:, :, (h - 55):(-55), (w - 55):(-55)] = torch.max(data[5], agg[:, :, (h - 55):(-55), (w - 55):(-55)]) #center-crop crop_fn = kornia.augmentation.CenterCrop(size=438) agg = crop_fn(agg) #flip 90 degree agg = kornia.warp_affine(agg, M_flip.repeat(len(x), 1, 1), dsize=(438, 438)) #Normalize color if label: normalize = K.Normalize(torch.tensor([0.698, 0.718, 0.730]), torch.tensor([0.322, 0.313, 0.308])) else: normalize = K.Normalize(torch.tensor([0.548, 0.597, 0.630]), torch.tensor([0.339, 0.340, 0.342])) return normalize(agg)
def __init__(self, img_size: int): super().__init__() self.preprocess = nn.Sequential( # K.augmentation.RandomResizedCrop((224, 224)), Resize((img_size, img_size)), # use this better to see whole image augmentation.Normalize(Tensor(DATASET_IMAGE_MEAN), Tensor(DATASET_IMAGE_STD)), )
def __init__(self, net, layer_name_list=['avgpool'], image_size=32, projection_size=256, projection_hidden_size=4096, augment_fn=None, moving_average_decay=0.99, device_='cuda', number_of_classes=10, mean_data=torch.tensor([0.485, 0.456, 0.406]), std_data=torch.tensor([0.229, 0.224, 0.225])): super().__init__() DEFAULT_AUG = nn.Sequential( augs.RandomHorizontalFlip(), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=mean_data, std=std_data)) self.augment = default(augment_fn, DEFAULT_AUG) self.device = device_ self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer_name_list=layer_name_list).to( self.device) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size).to(self.device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
def default_train_transforms(): image_size = ImageClassificationData.image_size if _KORNIA_AVAILABLE and not os.getenv("FLASH_TESTING", "0") == "1": # Better approach as all transforms are applied on tensor directly return { "to_tensor_transform": torchvision.transforms.ToTensor(), "post_tensor_transform": nn.Sequential(K.RandomResizedCrop(image_size), K.RandomHorizontalFlip()), "per_batch_transform_on_device": nn.Sequential( K.Normalize(torch.tensor([0.485, 0.456, 0.406]), torch.tensor([0.229, 0.224, 0.225])), ) } else: from torchvision import transforms as T # noqa F811 return { "pre_tensor_transform": nn.Sequential(T.RandomResizedCrop(image_size), T.RandomHorizontalFlip()), "to_tensor_transform": torchvision.transforms.ToTensor(), "post_tensor_transform": T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), }
def __init__(self, opt): super().__init__() self.wrapped_dataset = create_dataset(opt['dataset']) self.cropped_img_size = opt['crop_size'] self.key1 = opt_get(opt, ['key1'], 'hq') self.key2 = opt_get(opt, ['key2'], 'lq') for_sr = opt_get( opt, ['for_sr'], False) # When set, color alterations and blurs are disabled. augmentations = [ \ augs.RandomHorizontalFlip(), augs.RandomResizedCrop((self.cropped_img_size, self.cropped_img_size))] if not for_sr: augmentations.extend([ RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1) ]) if opt['normalize']: # The paper calls for normalization. Most datasets/models in this repo don't use this. # Recommend setting true if you want to train exactly like the paper. augmentations.append( augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))) self.aug = nn.Sequential(*augmentations)
def default_transforms(self) -> Dict[str, Callable]: if self.training: post_tensor_transform = [ RandomShortSideScale(min_size=256, max_size=320), RandomCrop(244), RandomHorizontalFlip(p=0.5), ] else: post_tensor_transform = [ ShortSideScale(256), ] return { "post_tensor_transform": Compose([ ApplyTransformToKey( key="video", transform=Compose([UniformTemporalSubsample(8)] + post_tensor_transform), ), ]), "per_batch_transform_on_device": Compose([ ApplyTransformToKey( key="video", transform=K.VideoSequential( K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])), data_format="BCTHW", same_on_frame=False ) ), ]), }
def __init__(self, mean, std, scale=(0.9, 1.1), max_degrees=0) -> None: super(Transform, self).__init__() self.max_degrees = max_degrees self.aff = k.RandomAffine(max_degrees, resample=k.Resample.NEAREST, scale=scale) self.norm = k.Normalize(mean, std)
def per_batch_transform_on_device(self) -> Callable: return ApplyToKeys( "video", K.VideoSequential( K.Normalize(self.mean, self.std), data_format=self.data_format, same_on_frame=self.same_on_frame, ), )
def __init__(self, im_size=224, device=torch.device('cuda:0')): super().__init__() self.mean = torch.tensor([0.485, 0.456, 0.406]).to(device) self.std = torch.tensor([0.229, 0.224, 0.225]).to(device) self.aug = torch.nn.Sequential( kornia.geometry.transform.Resize(int(im_size * 1.2)), Kaug.RandomCrop((im_size, im_size), padding=8), Kaug.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), Kaug.RandomHorizontalFlip(), Kaug.Normalize(mean=self.mean, std=self.std))
def default_aug(image_size: Tuple[int, int] = (360, 360)) -> nn.Module: return nn.Sequential( aug.ColorJitter(contrast=0.1, brightness=0.1, saturation=0.1, p=0.8), aug.RandomVerticalFlip(), aug.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (0.5, 0.5)), p=0.1), aug.RandomResizedCrop(size=image_size, scale=(0.5, 1)), aug.Normalize( mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]), ), )
def __init__( self, net, image_size, hidden_layer = -2, projection_size = 256, projection_hidden_size = 2048, augment_fn = None, augment_fn2 = None, moving_average_decay = 0.99, ppm_num_layers = 1, ppm_gamma = 2, distance_thres = 0.1, # the paper uses 0.7, but that leads to nearly all positive hits. need clarification on how the coordinates are normalized before distance calculation. similarity_temperature = 0.3, alpha = 1. ): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])) ) self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.distance_thres = distance_thres self.similarity_temperature = similarity_temperature self.alpha = alpha self.propagate_pixels = PPM( chan = projection_size, num_layers = ppm_num_layers, gamma = ppm_gamma ) # get device of network and make wrapper same device device = get_module_device(net) self.to(device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device))
def default_augmentation(image_size: Tuple[int, int] = (224, 224)) -> nn.Module: return nn.Sequential( tf.Resize(size=image_size), RandomApply(aug.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), aug.RandomGrayscale(p=0.2), aug.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), aug.RandomResizedCrop(size=image_size), aug.Normalize( mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]), ), )
def __init__(self, net, image_size=32, layer_name_list=[-2], projection_size=256, projection_hidden_size=4096, augment_fn=None, moving_average_decay=0.99, device_='cuda', number_of_classes=10, mean_data=torch.tensor([0.485, 0.456, 0.406]), std_data=torch.tensor([0.229, 0.224, 0.225])): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=mean_data, std=std_data)) self.augment = default(augment_fn, DEFAULT_AUG) self.device = device_ self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer_name_list=layer_name_list).to( self.device) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size).to(self.device) self.online_predictor1 = MLP(projection_size, projection_size, 512).to(self.device) self.online_predictor2 = MLP(projection_size, projection_size, 512).to(self.device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
def __init__(self, N_TFMS: int, MAGN: int, mean: Union[tuple, list, torch.tensor], std: Union[tuple, list, torch.tensor], transform_list: list = None, use_resize: int = None, image_size: tuple = None, use_mix: int = None, mix_p: float = .5): super().__init__() self.N_TFMS, self.MAGN = N_TFMS, MAGN self.use_mix, self.mix_p = use_mix, mix_p self.image_size = image_size if not isinstance(mean, torch.Tensor): mean = torch.Tensor(mean) if not isinstance(std, torch.Tensor): std = torch.Tensor(std) if self.use_mix is not None: self.mix_list = [ K.RandomCutMix(self.image_size[0], self.image_size[1], p=1), K.RandomMixUp(p=1) ] self.use_resize = use_resize if use_resize is not None: assert len( image_size ) == 2, 'Invalid `image_size`. Must be a tuple of form (h, w)' self.resize_list = [ K.RandomResizedCrop(image_size), K.RandomCrop(image_size), K.CenterCrop(image_size) ] if self.use_resize < 3: self.resize = self.resize_list[use_resize] self.normalize = K.Normalize(mean, std) self.transform_list = transform_list if transform_list is None: self.transform_list = kornia_list(MAGN)
def __init__(self,resize,image_size,mean,std,include_jitter=False,Set="train"): super().__init__() self.resize=G.Resize((resize,resize),align_corners=False) self.mask_resize=lambda x: torch.nn.functional.interpolate(x, size=(resize,resize), mode='nearest', align_corners=None)#G.Resize((resize,resize),interpolation='nearest',align_corners=False)# self.jit=K.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1) if include_jitter else (lambda x: x) # self.rotations=nn.ModuleList([ # K.augmentation.RandomAffine([-90., 90.], [0., 0.15], [0.5, 1.5], [0., 0.15]) # # K.RandomHorizontalFlip(p=0.5), # # K.RandomVerticalFlip(p=0.5), # # K.RandomRotation(90),#K.RandomResizedCrop((image_size,image_size),interpolation="nearest") # ]) # self.rotations_mask=nn.ModuleList([ # K.augmentation.RandomAffine([-90., 90.], [0., 0.15], [0.5, 1.5], [0., 0.15],resample="NEAREST") # ]) self.affine=K.augmentation.RandomAffine([-90., 90.], [0., 0.15], None, [0., 0.15]) self.affine_mask=K.augmentation.RandomAffine([-90., 90.], [0., 0.15], None, [0., 0.15],resample="NEAREST",align_corners=False) self.normalize=K.Normalize(mean,std) self.crop,self.mask_crop=K.CenterCrop((image_size,image_size)),K.CenterCrop((image_size,image_size),resample="NEAREST") self.Set=Set
def __init__(self, net, image_size, hidden_layer=-2, projection_size=256, projection_hidden_size=4096, augment_fn=None, augment_fn2=None, moving_average_decay=0.99): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))) self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) # get device of network and make wrapper same device device = get_module_device(net) self.to(device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device))
def __init__(self, brightness=(0.75, 1.25), contrast=(0.75, 1.25), saturation=(0., 2.), translate=(0.125, 0.125), normalized=True, mean=0.5, std=0.5, device=None): if normalized: if isinstance(mean, (tuple, list)) and isinstance(std, (tuple, list)): if not device: raise Exception( 'Please specify a torch.device() object when using mean and std for each channels' ) mean = torch.Tensor(mean).to(device) std = torch.Tensor(std).to(device) self.normalize = aug.Normalize(mean, std) self.denormalize = aug.Denormalize(mean, std) else: self.normalize, self.denormalize = None, None color_jitter = aug.ColorJitter( brightness=brightness, contrast=contrast, saturation=saturation, p=1.) # rand_brightness, rand_contrast, rand_saturation affine = aug.RandomAffine(degrees=0, translate=translate, padding_mode=SamplePadding.BORDER, p=1.) # rand_translate cutout = aug.RandomErasing(value=0.5, p=1.) # rand_cutout self.augmentations = { 'color': color_jitter, 'translation': affine, 'cutout': cutout }
def get_augmenter(augmenter_type: str, image_size: ImageSizeType, dataset_mean: DatasetStatType, dataset_std: DatasetStatType, padding: PaddingInputType = 1. / 8., pad_if_needed: bool = False, subset_size: int = 2) -> Union[Module, Callable]: """ Args: augmenter_type: augmenter type image_size: (height, width) image size dataset_mean: dataset mean value in CHW dataset_std: dataset standard deviation in CHW padding: percent of image size to pad on each border of the image. If a sequence of length 4 is provided, it is used to pad left, top, right, bottom borders respectively. If a sequence of length 2 is provided, it is used to pad left/right, top/bottom borders, respectively. pad_if_needed: bool flag for RandomCrop "pad_if_needed" option subset_size: number of augmentations used in subset Returns: nn.Module for Kornia augmentation or Callable for torchvision transform """ if not isinstance(padding, tuple): assert isinstance(padding, float) padding = (padding, padding, padding, padding) assert len(padding) == 2 or len(padding) == 4 if len(padding) == 2: # padding of length 2 is used to pad left/right, top/bottom borders, respectively # padding of length 4 is used to pad left, top, right, bottom borders respectively padding = (padding[0], padding[1], padding[0], padding[1]) # image_size is of shape (h,w); padding values is [left, top, right, bottom] borders padding = (int(image_size[1] * padding[0]), int( image_size[0] * padding[1]), int(image_size[1] * padding[2]), int(image_size[0] * padding[3])) augmenter_type = augmenter_type.strip().lower() if augmenter_type == "simple": return nn.Sequential( K.RandomCrop(size=image_size, padding=padding, pad_if_needed=pad_if_needed, padding_mode='reflect'), K.RandomHorizontalFlip(p=0.5), K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32), std=torch.tensor(dataset_std, dtype=torch.float32)), ) elif augmenter_type == "fixed": return nn.Sequential( K.RandomHorizontalFlip(p=0.5), # K.RandomVerticalFlip(p=0.2), K.RandomResizedCrop(size=image_size, scale=(0.8, 1.0), ratio=(1., 1.)), RandomAugmentation(p=0.5, augmentation=F.GaussianBlur2d( kernel_size=(3, 3), sigma=(1.5, 1.5), border_type='constant')), K.ColorJitter(contrast=(0.75, 1.5)), # additive Gaussian noise K.RandomErasing(p=0.1), # Multiply K.RandomAffine(degrees=(-25., 25.), translate=(0.2, 0.2), scale=(0.8, 1.2), shear=(-8., 8.)), K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32), std=torch.tensor(dataset_std, dtype=torch.float32)), ) elif augmenter_type in ["validation", "test"]: return nn.Sequential( K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32), std=torch.tensor(dataset_std, dtype=torch.float32)), ) elif augmenter_type == "randaugment": return nn.Sequential( K.RandomCrop(size=image_size, padding=padding, pad_if_needed=pad_if_needed, padding_mode='reflect'), K.RandomHorizontalFlip(p=0.5), RandAugmentNS(n=subset_size, m=10), K.Normalize(mean=torch.tensor(dataset_mean, dtype=torch.float32), std=torch.tensor(dataset_std, dtype=torch.float32)), ) else: raise NotImplementedError( f"\"{augmenter_type}\" is not a supported augmenter type")
def test_video_classifier_finetune_fiftyone(tmpdir): with mock_encoded_video_dataset_folder(tmpdir) as ( dir_name, total_duration, ): half_duration = total_duration / 2 - 1e-9 train_dataset = fo.Dataset.from_dir( dir_name, dataset_type=fo.types.VideoClassificationDirectoryTree, ) datamodule = VideoClassificationData.from_fiftyone( train_dataset=train_dataset, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, ) for sample in datamodule.train_dataset.data: expected_t_shape = 5 assert sample["video"].shape[1] == expected_t_shape assert len(VideoClassifier.available_backbones()) > 5 train_transform = { "post_tensor_transform": Compose([ ApplyTransformToKey( key="video", transform=Compose([ UniformTemporalSubsample(8), RandomShortSideScale(min_size=256, max_size=320), RandomCrop(244), RandomHorizontalFlip(p=0.5), ]), ), ]), "per_batch_transform_on_device": Compose([ ApplyTransformToKey( key="video", transform=K.VideoSequential( K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])), K.augmentation.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), data_format="BCTHW", same_on_frame=False ) ), ]), } datamodule = VideoClassificationData.from_fiftyone( train_dataset=train_dataset, clip_sampler="uniform", clip_duration=half_duration, video_sampler=SequentialSampler, decode_audio=False, train_transform=train_transform ) model = VideoClassifier(num_classes=datamodule.num_classes, pretrained=False) trainer = flash.Trainer(fast_dev_run=True) trainer.finetune(model, datamodule=datamodule)
class TestVideoSequential: @pytest.mark.parametrize('shape', [(3, 4), (2, 3, 4), (2, 3, 5, 6), (2, 3, 4, 5, 6, 7)]) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_exception(self, shape, data_format, device, dtype): aug_list = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1), data_format=data_format, same_on_frame=True) with pytest.raises(AssertionError): img = torch.randn(*shape, device=device, dtype=dtype) aug_list(img) @pytest.mark.parametrize( 'augmentation', [ K.RandomAffine(360, p=1.0), K.CenterCrop((3, 3), p=1.0), K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomCrop((5, 5), p=1.0), K.RandomErasing(p=1.0), K.RandomGrayscale(p=1.0), K.RandomHorizontalFlip(p=1.0), K.RandomVerticalFlip(p=1.0), K.RandomPerspective(p=1.0), K.RandomResizedCrop((5, 5), p=1.0), K.RandomRotation(360.0, p=1.0), K.RandomSolarize(p=1.0), K.RandomPosterize(p=1.0), K.RandomSharpness(p=1.0), K.RandomEqualize(p=1.0), K.RandomMotionBlur(3, 35.0, 0.5, p=1.0), K.Normalize(torch.tensor([0.5, 0.5, 0.5]), torch.tensor([0.5, 0.5, 0.5]), p=1.0), K.Denormalize(torch.tensor([0.5, 0.5, 0.5]), torch.tensor([0.5, 0.5, 0.5]), p=1.0), ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_augmentation(self, augmentation, data_format, device, dtype): input = torch.randint(255, (1, 3, 3, 5, 6), device=device, dtype=dtype).repeat(2, 1, 1, 1, 1) / 255.0 torch.manual_seed(21) aug_list = K.VideoSequential(augmentation, data_format=data_format, same_on_frame=True) reproducibility_test(input, aug_list) @pytest.mark.parametrize( 'augmentations', [ [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomAffine(360, p=1.0) ], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0) ], [K.RandomAffine(360, p=1.0), kornia.color.BgrToRgb()], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0), K.RandomAffine(360, p=0.0) ], [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0)], [K.RandomAffine(360, p=0.0)], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomAffine(360, p=1.0), K.RandomMixUp(p=1.0) ], ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) @pytest.mark.parametrize('random_apply', [1, (1, 1), (1, ), 10, True, False]) def test_same_on_frame(self, augmentations, data_format, random_apply, device, dtype): aug_list = K.VideoSequential(*augmentations, data_format=data_format, same_on_frame=True, random_apply=random_apply) if data_format == 'BCTHW': input = torch.randn(2, 3, 1, 5, 6, device=device, dtype=dtype).repeat(1, 1, 4, 1, 1) output = aug_list(input) if aug_list.return_label: output, _ = output assert (output[:, :, 0] == output[:, :, 1]).all() assert (output[:, :, 1] == output[:, :, 2]).all() assert (output[:, :, 2] == output[:, :, 3]).all() if data_format == 'BTCHW': input = torch.randn(2, 1, 3, 5, 6, device=device, dtype=dtype).repeat(1, 4, 1, 1, 1) output = aug_list(input) if aug_list.return_label: output, _ = output assert (output[:, 0] == output[:, 1]).all() assert (output[:, 1] == output[:, 2]).all() assert (output[:, 2] == output[:, 3]).all() reproducibility_test(input, aug_list) @pytest.mark.parametrize( 'augmentations', [ [K.RandomAffine(360, p=1.0)], [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0)], [ K.RandomAffine(360, p=0.0), K.ImageSequential(K.RandomAffine(360, p=0.0)) ], ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_against_sequential(self, augmentations, data_format, device, dtype): aug_list_1 = K.VideoSequential(*augmentations, data_format=data_format, same_on_frame=False) aug_list_2 = torch.nn.Sequential(*augmentations) if data_format == 'BCTHW': input = torch.randn(2, 3, 1, 5, 6, device=device, dtype=dtype).repeat(1, 1, 4, 1, 1) if data_format == 'BTCHW': input = torch.randn(2, 1, 3, 5, 6, device=device, dtype=dtype).repeat(1, 4, 1, 1, 1) torch.manual_seed(0) output_1 = aug_list_1(input) torch.manual_seed(0) if data_format == 'BCTHW': input = input.transpose(1, 2) output_2 = aug_list_2(input.reshape(-1, 3, 5, 6)) output_2 = output_2.view(2, 4, 3, 5, 6) if data_format == 'BCTHW': output_2 = output_2.transpose(1, 2) assert (output_1 == output_2).all(), dict(aug_list_1._params) @pytest.mark.jit @pytest.mark.skip(reason="turn off due to Union Type") def test_jit(self, device, dtype): B, C, D, H, W = 2, 3, 5, 4, 4 img = torch.ones(B, C, D, H, W, device=device, dtype=dtype) op = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1), same_on_frame=True) op_jit = torch.jit.script(op) assert_close(op(img), op_jit(img))
def __init__(self, net, image_size, hidden_layer_pixel=-2, hidden_layer_instance=-2, projection_size=256, projection_hidden_size=2048, augment_fn=None, augment_fn2=None, prob_rand_hflip=0.25, moving_average_decay=0.99, ppm_num_layers=1, ppm_gamma=2, distance_thres=0.7, similarity_temperature=0.3, alpha=1., use_pixpro=True, cutout_ratio_range=(0.6, 0.8), cutout_interpolate_mode='nearest', coord_cutout_interpolate_mode='bilinear'): super().__init__() DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomSolarize(p=0.5), augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))) self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) self.prob_rand_hflip = prob_rand_hflip self.online_encoder = NetWrapper( net=net, projection_size=projection_size, projection_hidden_size=projection_hidden_size, layer_pixel=hidden_layer_pixel, layer_instance=hidden_layer_instance) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.distance_thres = distance_thres self.similarity_temperature = similarity_temperature self.alpha = alpha self.use_pixpro = use_pixpro if use_pixpro: self.propagate_pixels = PPM(chan=projection_size, num_layers=ppm_num_layers, gamma=ppm_gamma) self.cutout_ratio_range = cutout_ratio_range self.cutout_interpolate_mode = cutout_interpolate_mode self.coord_cutout_interpolate_mode = coord_cutout_interpolate_mode # instance level predictor self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) # get device of network and make wrapper same device device = get_module_device(net) self.to(device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device))
if __name__ == '__main__': # 1. Download a video clip dataset. Find more dataset at https://pytorchvideo.readthedocs.io/en/latest/data.html download_data("https://pl-flash-data.s3.amazonaws.com/kinetics.zip") # 2. [Optional] Specify transforms to be used during training. # Flash helps you to place your transform exactly where you want. # Learn more at: # https://lightning-flash.readthedocs.io/en/latest/general/data.html#flash.core.data.process.Preprocess post_tensor_transform = [ UniformTemporalSubsample(8), RandomShortSideScale(min_size=256, max_size=320) ] per_batch_transform_on_device = [ K.Normalize(torch.tensor([0.45, 0.45, 0.45]), torch.tensor([0.225, 0.225, 0.225])) ] train_post_tensor_transform = post_tensor_transform + [ RandomCrop(244), RandomHorizontalFlip(p=0.5) ] val_post_tensor_transform = post_tensor_transform + [CenterCrop(244)] train_per_batch_transform_on_device = per_batch_transform_on_device def make_transform( post_tensor_transform: List[Callable] = post_tensor_transform, per_batch_transform_on_device: List[ Callable] = per_batch_transform_on_device): return { "post_tensor_transform": Compose([