def generate_kornia_transforms(image_size=224, resize=256, mean=[], std=[], include_jitter=False): mean = torch.tensor(mean) if mean else torch.tensor([0.5, 0.5, 0.5]) std = torch.tensor(std) if std else torch.tensor([0.1, 0.1, 0.1]) if torch.cuda.is_available(): mean = mean.cuda() std = std.cuda() train_transforms = [G.Resize((resize, resize))] if include_jitter: train_transforms.append( K.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1)) train_transforms.extend([ K.RandomHorizontalFlip(p=0.5), K.RandomVerticalFlip(p=0.5), K.RandomRotation(90), K.RandomResizedCrop((image_size, image_size)), K.Normalize(mean, std) ]) val_transforms = [ G.Resize((resize, resize)), K.CenterCrop((image_size, image_size)), K.Normalize(mean, std) ] transforms = dict(train=nn.Sequential(*train_transforms), val=nn.Sequential(*val_transforms)) if torch.cuda.is_available(): for k in transforms: transforms[k] = transforms[k].cuda() return transforms
def test_random_crops_and_flips(self, device, dtype): width, height = 100, 100 crop_width, crop_height = 3, 3 input = torch.randn(3, 3, width, height, device=device, dtype=dtype) bbox = torch.tensor([[[1.0, 1.0, 2.0, 2.0], [0.0, 0.0, 1.0, 2.0], [0.0, 0.0, 2.0, 1.0]]], device=device, dtype=dtype).expand(3, -1, -1) aug = K.AugmentationSequential( K.RandomCrop((crop_width, crop_height), padding=1, cropping_mode='resample', fill=0), K.RandomHorizontalFlip(p=1.0), data_keys=["input", "bbox_xyxy"], ) reproducibility_test((input, bbox), aug) _params = aug.forward_parameters(input.shape) # specifying the crop locations allows us to compute by hand the expected outputs crop_locations = torch.tensor( [[1.0, 2.0], [1.0, 1.0], [2.0, 0.0]], device=_params[0].data['src'].device, dtype=_params[0].data['src'].dtype, ) crops = crop_locations.expand(4, -1, -1).permute(1, 0, 2).clone() crops[:, 1:3, 0] += crop_width - 1 crops[:, 2:4, 1] += crop_height - 1 _params[0].data['src'] = crops # expected output bboxes after crop for specified crop locations and crop size (3,3) expected_out_bbox = torch.tensor( [ [[1.0, 0.0, 2.0, 1.0], [0.0, -1.0, 1.0, 1.0], [0.0, -1.0, 2.0, 0.0]], [[1.0, 1.0, 2.0, 2.0], [0.0, 0.0, 1.0, 2.0], [0.0, 0.0, 2.0, 1.0]], [[0.0, 2.0, 1.0, 3.0], [-1.0, 1.0, 0.0, 3.0], [-1.0, 1.0, 1.0, 2.0]], ], device=device, dtype=dtype, ) # horizontally flip boxes based on crop width xmins = expected_out_bbox[..., 0].clone() xmaxs = expected_out_bbox[..., 2].clone() expected_out_bbox[..., 0] = crop_width - xmaxs expected_out_bbox[..., 2] = crop_width - xmins out = aug(input, bbox, params=_params) assert out[1].shape == bbox.shape assert_close(out[1], expected_out_bbox, atol=1e-4, rtol=1e-4) out_inv = aug.inverse(*out) assert out_inv[1].shape == bbox.shape assert_close(out_inv[1], bbox, atol=1e-4, rtol=1e-4)
def __init__( self, net, image_size, hidden_layer=-2, project_hidden=True, project_dim=128, augment_both=True, use_nt_xent_loss=False, augment_fn=None, use_bilinear=False, use_momentum=False, momentum_value=0.999, key_encoder=None, temperature=0.1, fp16=False, ): super().__init__() = OutputHiddenLayer(net, layer=hidden_layer) DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), ) self.augment = default(augment_fn, DEFAULT_AUG) self.augment_both = augment_both self.temperature = temperature self.use_nt_xent_loss = use_nt_xent_loss self.project_hidden = project_hidden self.projection = None self.project_dim = project_dim self.use_bilinear = use_bilinear self.bilinear_w = None self.use_momentum = use_momentum self.ema_updater = EMA(momentum_value) self.key_encoder = key_encoder # for accumulating queries and keys across calls self.queries = None self.keys = None self.fp16 = fp16 # send a mock image tensor to instantiate parameters init = torch.randn(1, 3, image_size, image_size, device="cuda") if self.fp16: init = init.half() self.forward(init)
def __init__(self, opt): super(PostTensorTransform, self).__init__() self.random_crop = ProbTransform(A.RandomCrop( (opt.input_height, opt.input_width), padding=opt.random_crop), p=0.8) self.random_rotation = ProbTransform(A.RandomRotation( opt.random_rotation), p=0.5) if opt.dataset == "cifar10": self.random_horizontal_flip = A.RandomHorizontalFlip(p=0.5)
def __init__(self, im_size=224, device=torch.device('cuda:0')): super().__init__() self.mean = torch.tensor([0.485, 0.456, 0.406]).to(device) self.std = torch.tensor([0.229, 0.224, 0.225]).to(device) self.aug = torch.nn.Sequential( kornia.geometry.transform.Resize(int(im_size * 1.2)), Kaug.RandomCrop((im_size, im_size), padding=8), Kaug.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), Kaug.RandomHorizontalFlip(), Kaug.Normalize(mean=self.mean, std=self.std))
def __init__(self, cutn): super().__init__() self.cutn = cutn self.augs = nn.Sequential( K.RandomHorizontalFlip(p=0.5), K.ColorJitter(hue=0.01, saturation=0.01, p=0.7), #K.RandomSolarize(0.01, 0.01, p=0.7), K.RandomSharpness(0.3, p=0.4), K.RandomAffine(degrees=30, translate=0.1, p=0.8, padding_mode='border'), K.RandomPerspective(0.2, p=0.4), ) self.noise_fac = 0.1
def __init__( self, net, image_size, hidden_layer = -2, projection_size = 256, projection_hidden_size = 2048, augment_fn = None, augment_fn2 = None, moving_average_decay = 0.99, ppm_num_layers = 1, ppm_gamma = 2, distance_thres = 0.1, # the paper uses 0.7, but that leads to nearly all positive hits. need clarification on how the coordinates are normalized before distance calculation. similarity_temperature = 0.3, alpha = 1. ): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])) ) self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.distance_thres = distance_thres self.similarity_temperature = similarity_temperature self.alpha = alpha self.propagate_pixels = PPM( chan = projection_size, num_layers = ppm_num_layers, gamma = ppm_gamma ) # get device of network and make wrapper same device device = get_module_device(net) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device))
def default_aug(image_size: Tuple[int, int] = (360, 360)) -> nn.Module: return nn.Sequential( aug.ColorJitter(contrast=0.1, brightness=0.1, saturation=0.1, p=0.8), aug.RandomVerticalFlip(), aug.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (0.5, 0.5)), p=0.1), aug.RandomResizedCrop(size=image_size, scale=(0.5, 1)), aug.Normalize( mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]), ), )
def default_augmentation(image_size: Tuple[int, int] = (224, 224)) -> nn.Module: return nn.Sequential( tf.Resize(size=image_size), RandomApply(aug.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), aug.RandomGrayscale(p=0.2), aug.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), aug.RandomResizedCrop(size=image_size), aug.Normalize( mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]), ), )
def __init__(self, opt): super().__init__() self.wrapped_dataset = create_dataset(opt['dataset']) self.cropped_img_size = opt['crop_size'] self.includes_labels = opt['includes_labels'] augmentations = [ \ RandomApply(augs.ColorJitter(0.4, 0.4, 0.4, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1)] self.aug = nn.Sequential(*augmentations) self.rrc = nn.Sequential(*[ augs.RandomHorizontalFlip(), augs.RandomResizedCrop((self.cropped_img_size, self.cropped_img_size)) ])
def __init__(self, viz: bool = False): super().__init__() self.viz = viz '''self.geometric = [ K.augmentation.RandomAffine(60., p=0.75), ]''' self.augmentations = nn.Sequential( augmentation.RandomRotation(degrees=30.), augmentation.RandomPerspective(distortion_scale=0.4), augmentation.RandomResizedCrop((224, 224)), augmentation.RandomHorizontalFlip(p=0.5), augmentation.RandomVerticalFlip(p=0.5), # K.augmentation.GaussianBlur((3, 3), (0.1, 2.0), p=1.0), # K.augmentation.ColorJitter(0.01, 0.01, 0.01, 0.01, p=0.25), ) self.denorm = augmentation.Denormalize(Tensor(DATASET_IMAGE_MEAN), Tensor(DATASET_IMAGE_STD))
def __init__(self, celeba_folder, mask_folder, mode="train", use_transforms=False): super().__init__() self.images = [] self.labels = [] self.mode = mode self.mask_folder = mask_folder self.crop_aug = K.RandomCrop((218, 178), pad_if_needed=True) self.flip_aug = K.RandomHorizontalFlip() self.id_map = {} with open(celeba_folder + "/identity_CelebA.txt") as id_f: for line in id_f: im, id = line.split() self.id_map[im[:-4]] = int(id) with open(celeba_folder + "/labels/list_attr_celeba.txt") as label_f: id_count = label_f.readline().strip("\n") self.attributes = np.array(label_f.readline().strip("\n").split()) for i, line in enumerate("\n")[:-1]): image_data = line.split() image = image_data[0] if not os.path.exists(f"{mask_folder}" f"/id-{self.id_map[image[:-4]]}"): continue image_labels = [int(label) for label in image_data[1:]] image_n = int(image[:-4]) train_cond = mode == "train" and image_n < 162771 val_cond = mode == "val" and image_n >= 162771 and image_n < 182638 test_cond = mode == "test" and image_n >= 182638 full_cond = mode == "full" if train_cond or val_cond or test_cond or full_cond: self.images.append(celeba_folder + "/images/" + image) self.labels.append(image_labels) self.alt_background = TF.to_tensor("unicorn.jpg")) self.length = len(self.images)
def __init__(self, model, imageSize, embeddingLayer=-2, projectionDim=256, projectionHiddenDim=4096, emaDecay=0.99): super(BYOL, self).__init__() # Default SimCLR augmentations self.augment = nn.Sequential( RandomApply(augmentation.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augmentation.RandomGrayscale(p=0.2), augmentation.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augmentation.RandomResizedCrop((imageSize, imageSize)), color.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])) ) # Initialize models, predictors and EMA self.onlineEncoder = ModelWrapper(model, projectionDim, projectionHiddenDim, embeddingLayer) self.onlinePredictor = MLP(projectionDim, projectionDim, projectionHiddenDim) self.targetEncoder = copy.deepcopy(self.onlineEncoder) self.targetEMA = EMA(emaDecay)
def __init__(self, model_name, n_out): super(AudioClassifier, self).__init__() # Spec augmenter self.spec_augmenter = SpecAugmentation(time_drop_width=80, time_stripes_num=2, freq_drop_width=16, freq_stripes_num=2) = timm.create_model(model_name, pretrained=True, in_chans=1) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout1 = nn.Dropout(0.3) self.dropout2 = nn.Dropout(0.3) n_features = self.net_classifier = nn.Linear(n_features, n_out) self.init_weight() # korrniaのrandom cropはh,wを想定しているため注意 self.transform = nn.Sequential(K.RandomHorizontalFlip(p=0.1), # K.GaussianBlur(7, p=0.5), # K.RandomCrop((round(IMAGE_HEIGHT*0.7), round(IMAGE_WIDTH*0.7)),p=0.3) )
def __init__(self, net, image_size=32, layer_name_list=[-2], projection_size=256, projection_hidden_size=4096, augment_fn=None, moving_average_decay=0.99, device_='cuda', number_of_classes=10, mean_data=torch.tensor([0.485, 0.456, 0.406]), std_data=torch.tensor([0.229, 0.224, 0.225])): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=mean_data, std=std_data)) self.augment = default(augment_fn, DEFAULT_AUG) self.device = device_ self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer_name_list=layer_name_list).to( self.device) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size).to(self.device) self.online_predictor1 = MLP(projection_size, projection_size, 512).to(self.device) self.online_predictor2 = MLP(projection_size, projection_size, 512).to(self.device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
def __init__(self, utes, mask, ct, length, opt): super(TrainDataset, self).__init__() self.utes = utes self.label = ct self.mask = mask self.length = length self.num_vols = utes.shape[0] self.batch_size = opt.trainBatchSize self.spatial = nn.Sequential( ka.RandomAffine(45, translate=(0.1, 0.1), scale=(0.85, 1.15), shear=(0.1, 0.1), same_on_batch=True), ka.RandomVerticalFlip(same_on_batch=True), ka.RandomHorizontalFlip(same_on_batch=True)) self.dim = 2 self.counter = 0
def __init__( self, net, image_size, hidden_layer=-2, projection_size=256, projection_hidden_size=4096, moving_average_decay=0.99, use_momentum=True, structural_mlp=False, ): super().__init__() self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer, use_structural_mlp=structural_mlp) augmentations = [ \ RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size))] self.aug = nn.Sequential(*augmentations) self.use_momentum = use_momentum self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) # get device of network and make wrapper same device device = get_module_device(net) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device), torch.randn(2, 3, image_size, image_size, device=device))
def __init__(self, net, image_size, hidden_layer=-2, projection_size=256, projection_hidden_size=4096, augment_fn=None, augment_fn2=None, moving_average_decay=0.99): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))) self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) # get device of network and make wrapper same device device = get_module_device(net) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size, device=device))
def default_train_transforms(): image_size = ImageClassificationData.image_size if _KORNIA_AVAILABLE and not os.getenv("FLASH_TESTING", "0") == "1": # Better approach as all transforms are applied on tensor directly return { "post_tensor_transform": nn.Sequential(K.RandomResizedCrop(image_size), K.RandomHorizontalFlip()), "per_batch_transform_on_device": nn.Sequential( K.Normalize(torch.tensor([0.485, 0.456, 0.406]), torch.tensor([0.229, 0.224, 0.225])), ) } else: from torchvision import transforms as T # noqa F811 return { "pre_tensor_transform": nn.Sequential(T.RandomResizedCrop(image_size), T.RandomHorizontalFlip()), "post_tensor_transform": T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), }
def get_frame_aug(args): train_transform = [] if 'cj' in args.frame_aug: _cj = 0.1 train_transform += [ #K.RandomGrayscale(p=0.2), K.ColorJitter(_cj, _cj, _cj, 0), ] if 'flip' in args.frame_aug: train_transform += [ K.RandomHorizontalFlip(same_on_batch=True), ] train_transform.append(kornia.color.Normalize(mean=IMG_MEAN, std=IMG_STD)) train_transform = nn.Sequential(*train_transform) print('Frame augs:', train_transform, args.frame_aug) # HACK if you up value the args field, it is a pointer!!! patch_size = np.array(args.patch_size) def aug(x): if 'grid' in args.frame_aug: return patch_grid(x, transform=train_transform, shape=patch_size, stride=args.pstride) elif 'randpatch' in args.frame_aug: return n_patches(x, args.npatch, transform=train_transform, shape=patch_size, scale=args.npatch_scale) else: return train_transform(x) return aug
def __init__(self, net, image_size, hidden_layer = -2, projection_size = 256, projection_hidden_size = 4096, augment_fn = None, moving_average_decay = 0.99): super().__init__() # default SimCLR augmentation DEFAULT_AUG = nn.Sequential( RandomApply(augs.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.8), augs.RandomGrayscale(p=0.2), augs.RandomHorizontalFlip(), RandomApply(filters.GaussianBlur2d((3, 3), (1.5, 1.5)), p=0.1), augs.RandomResizedCrop((image_size, image_size)) ) self.augment = default(augment_fn, DEFAULT_AUG) self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size))
def get_frame_transform(frame_transform_str, img_size, cuda=True): tt = [] if 'gray' in frame_transform_str: tt += [K.RandomGrayscale(p=1)] if 'crop' in frame_transform_str: tt += [ K.RandomResizedCrop(img_size, scale=(0.8, 0.95), ratio=(0.7, 1.3)) ] else: tt += [kornia.geometry.transform.Resize((img_size, img_size))] if 'cj' in frame_transform_str: _cj = 0.1 tt += [ #K.RandomGrayscale(p=0.2), K.ColorJitter(_cj, _cj, _cj, _cj) ] if 'flip' in frame_transform_str: tt += [K.RandomHorizontalFlip()] return tt
def test_random_flips(self, device, dtype): inp = torch.randn(1, 3, 510, 1020, device=device, dtype=dtype) bbox = torch.tensor([[[355, 10], [660, 10], [660, 250], [355, 250]]], device=device, dtype=dtype) expected_bbox_vertical_flip = torch.tensor( [[[355, 499], [660, 499], [660, 259], [355, 259]]], device=device, dtype=dtype ) expected_bbox_horizontal_flip = torch.tensor( [[[664, 10], [359, 10], [359, 250], [664, 250]]], device=device, dtype=dtype ) aug_ver = K.AugmentationSequential( K.RandomVerticalFlip(p=1.0), data_keys=["input", "bbox"], return_transform=False, same_on_batch=False ) aug_hor = K.AugmentationSequential( K.RandomHorizontalFlip(p=1.0), data_keys=["input", "bbox"], return_transform=False, same_on_batch=False ) out_ver = aug_ver(inp, bbox) out_hor = aug_hor(inp, bbox) assert_close(out_ver[1], expected_bbox_vertical_flip) assert_close(out_hor[1], expected_bbox_horizontal_flip)
def __init__(self, s_color=0.5, p_color=0.8, p_flip=0.5, p_gray=0.2, p_blur=0.5, kernel_min=0.1, kernel_max=2.) -> None: super(KorniaAugmentationPipeline, self).__init__() T_hflip = K.RandomHorizontalFlip(p=p_flip) T_gray = K.RandomGrayscale(p=p_gray) T_color = K.ColorJitter(p_color, 0.8 * s_color, 0.8 * s_color, 0.8 * s_color, 0.2 * s_color) radius = kernel_max * 2 # kernel_size = int(radius * 2 + 1) # needs to be odd. kernel_size = (kernel_size, kernel_size) T_blur = K.GaussianBlur(kernel_size=kernel_size, sigma=(kernel_min, kernel_max), p=p_blur) #T_blur = KorniaRandomGaussianBlur(kernel_size=kernel_size, sigma=(kernel_min, kernel_max), p=p_blur) self.transform = nn.Sequential(T_hflip, T_color, T_gray, T_blur)
def __init__(self, net, layer_name_list=['avgpool'], image_size=32, hidden_layer=-2, projection_size=256, projection_hidden_size=4096, augment_fn=None, moving_average_decay=0.99, device_='cuda', number_of_classes=10, mean_data=torch.tensor([0.485, 0.456, 0.406]), std_data=torch.tensor([0.229, 0.224, 0.225])): super().__init__() DEFAULT_AUG = nn.Sequential( augs.RandomHorizontalFlip(), augs.RandomResizedCrop((image_size, image_size)), augs.Normalize(mean=mean_data, std=std_data)) self.augment = default(augment_fn, DEFAULT_AUG) self.device = device_ self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer_name_list=layer_name_list).to( self.device) self.target_encoder = None self.target_ema_updater = EMA(moving_average_decay) self.online_predictor = MLP(projection_size, projection_size, projection_hidden_size).to(self.device) # send a mock image tensor to instantiate singleton parameters self.forward(torch.randn(2, 3, image_size, image_size).to(self.device))
def get_frame_transform(args, cuda=True): imsz = args.img_size norm_size = kornia.geometry.transform.Resize((imsz, imsz)) norm_imgs = kornia.color.Normalize(mean=IMG_MEAN, std=IMG_STD) tt = [] fts = args.frame_transforms #.split(',') if 'gray' in fts: tt.append(K.RandomGrayscale(p=1)) if 'crop' in fts: tt.append( K.RandomResizedCrop(imsz, scale=(0.8, 0.95), ratio=(0.7, 1.3))) else: tt.append(norm_size) if 'cj2' in fts: _cj = 0.2 tt += [ K.RandomGrayscale(p=0.2), K.ColorJitter(_cj, _cj, _cj, _cj), ] elif 'cj' in fts: _cj = 0.1 tt += [ # K.RandomGrayscale(p=0.2), K.ColorJitter(_cj, _cj, _cj, 0), ] if 'flip' in fts: tt += [K.RandomHorizontalFlip()] if args.npatch > 1 and args.frame_aug != '': tt += [get_frame_aug(args)] else: tt += [norm_imgs] print('Frame transforms:', tt, args.frame_transforms) # frame_transform_train = MapTransform(transforms.Compose(tt)) frame_transform_train = transforms.Compose(tt) plain = nn.Sequential(norm_size, norm_imgs) def with_orig(x): if cuda: x = x.cuda() if x.max() > 1 and x.min() >= 0: x = x.float() x -= x.min() x /= x.max() if x.shape[-1] == 3: x = x.permute(0, 3, 1, 2) patchify = (not args.visualize) or True x = (frame_transform_train(x) if patchify else plain(x)).cpu(), \ plain(x[0:1]).cpu() return x return with_orig
validation.track_spectral_norm(mod) g_ema = Generator( args.size, args.latent_size, args.n_mlp, channel_multiplier=args.channel_multiplier, constant_input=args.constant_input, ).to(device) g_ema.requires_grad_(False) g_ema.eval() accumulate(g_ema, generator, 0) augment_fn = nn.Sequential( nn.ReflectionPad2d(int((math.sqrt(2) - 1) * args.size / 4)), # zoom out augs.RandomHorizontalFlip(), RandomApply(augs.RandomAffine(degrees=0, translate=(0.25, 0.25), shear=(15, 15)), p=0.2), RandomApply(augs.RandomRotation(180), p=0.2), augs.RandomResizedCrop(size=(args.size, args.size), scale=(1, 1), ratio=(1, 1)), RandomApply(augs.RandomResizedCrop(size=(args.size, args.size), scale=(0.5, 0.9)), p=0.1), # zoom in RandomApply(augs.RandomErasing(), p=0.1), ) contrast_learner = ( ContrastiveLearner(discriminator, args.size, augment_fn=augment_fn, hidden_layer=(-1, 0)) if args.contrastive > 0 else None ) g_reg_ratio = args.g_reg_every / (args.g_reg_every + 1) d_reg_ratio = args.d_reg_every / (args.d_reg_every + 1)
def __init__(self, probability: float = 0.1): self._probability = probability self._operation = aug.RandomHorizontalFlip(p=probability)
class TestVideoSequential: @pytest.mark.parametrize('shape', [(3, 4), (2, 3, 4), (2, 3, 5, 6), (2, 3, 4, 5, 6, 7)]) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_exception(self, shape, data_format, device, dtype): aug_list = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1), data_format=data_format, same_on_frame=True) with pytest.raises(AssertionError): img = torch.randn(*shape, device=device, dtype=dtype) aug_list(img) @pytest.mark.parametrize( 'augmentation', [ K.RandomAffine(360, p=1.0), K.CenterCrop((3, 3), p=1.0), K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomCrop((5, 5), p=1.0), K.RandomErasing(p=1.0), K.RandomGrayscale(p=1.0), K.RandomHorizontalFlip(p=1.0), K.RandomVerticalFlip(p=1.0), K.RandomPerspective(p=1.0), K.RandomResizedCrop((5, 5), p=1.0), K.RandomRotation(360.0, p=1.0), K.RandomSolarize(p=1.0), K.RandomPosterize(p=1.0), K.RandomSharpness(p=1.0), K.RandomEqualize(p=1.0), K.RandomMotionBlur(3, 35.0, 0.5, p=1.0), K.Normalize(torch.tensor([0.5, 0.5, 0.5]), torch.tensor([0.5, 0.5, 0.5]), p=1.0), K.Denormalize(torch.tensor([0.5, 0.5, 0.5]), torch.tensor([0.5, 0.5, 0.5]), p=1.0), ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_augmentation(self, augmentation, data_format, device, dtype): input = torch.randint(255, (1, 3, 3, 5, 6), device=device, dtype=dtype).repeat(2, 1, 1, 1, 1) / 255.0 torch.manual_seed(21) aug_list = K.VideoSequential(augmentation, data_format=data_format, same_on_frame=True) reproducibility_test(input, aug_list) @pytest.mark.parametrize( 'augmentations', [ [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomAffine(360, p=1.0) ], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0) ], [K.RandomAffine(360, p=1.0), kornia.color.BgrToRgb()], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0), K.RandomAffine(360, p=0.0) ], [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=0.0)], [K.RandomAffine(360, p=0.0)], [ K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0), K.RandomAffine(360, p=1.0), K.RandomMixUp(p=1.0) ], ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) @pytest.mark.parametrize('random_apply', [1, (1, 1), (1, ), 10, True, False]) def test_same_on_frame(self, augmentations, data_format, random_apply, device, dtype): aug_list = K.VideoSequential(*augmentations, data_format=data_format, same_on_frame=True, random_apply=random_apply) if data_format == 'BCTHW': input = torch.randn(2, 3, 1, 5, 6, device=device, dtype=dtype).repeat(1, 1, 4, 1, 1) output = aug_list(input) if aug_list.return_label: output, _ = output assert (output[:, :, 0] == output[:, :, 1]).all() assert (output[:, :, 1] == output[:, :, 2]).all() assert (output[:, :, 2] == output[:, :, 3]).all() if data_format == 'BTCHW': input = torch.randn(2, 1, 3, 5, 6, device=device, dtype=dtype).repeat(1, 4, 1, 1, 1) output = aug_list(input) if aug_list.return_label: output, _ = output assert (output[:, 0] == output[:, 1]).all() assert (output[:, 1] == output[:, 2]).all() assert (output[:, 2] == output[:, 3]).all() reproducibility_test(input, aug_list) @pytest.mark.parametrize( 'augmentations', [ [K.RandomAffine(360, p=1.0)], [K.ColorJitter(0.1, 0.1, 0.1, 0.1, p=1.0)], [ K.RandomAffine(360, p=0.0), K.ImageSequential(K.RandomAffine(360, p=0.0)) ], ], ) @pytest.mark.parametrize('data_format', ["BCTHW", "BTCHW"]) def test_against_sequential(self, augmentations, data_format, device, dtype): aug_list_1 = K.VideoSequential(*augmentations, data_format=data_format, same_on_frame=False) aug_list_2 = torch.nn.Sequential(*augmentations) if data_format == 'BCTHW': input = torch.randn(2, 3, 1, 5, 6, device=device, dtype=dtype).repeat(1, 1, 4, 1, 1) if data_format == 'BTCHW': input = torch.randn(2, 1, 3, 5, 6, device=device, dtype=dtype).repeat(1, 4, 1, 1, 1) torch.manual_seed(0) output_1 = aug_list_1(input) torch.manual_seed(0) if data_format == 'BCTHW': input = input.transpose(1, 2) output_2 = aug_list_2(input.reshape(-1, 3, 5, 6)) output_2 = output_2.view(2, 4, 3, 5, 6) if data_format == 'BCTHW': output_2 = output_2.transpose(1, 2) assert (output_1 == output_2).all(), dict(aug_list_1._params) @pytest.mark.jit @pytest.mark.skip(reason="turn off due to Union Type") def test_jit(self, device, dtype): B, C, D, H, W = 2, 3, 5, 4, 4 img = torch.ones(B, C, D, H, W, device=device, dtype=dtype) op = K.VideoSequential(K.ColorJitter(0.1, 0.1, 0.1, 0.1), same_on_frame=True) op_jit = torch.jit.script(op) assert_close(op(img), op_jit(img))
mode="train", download=True) val_dataset ="data", mode="validation", download=True) transform = { "per_sample_transform": nn.Sequential( ApplyToKeys( DataKeys.INPUT, nn.Sequential( torchvision.transforms.ToTensor(), Kg.Resize((196, 196)), # SPATIAL Ka.RandomHorizontalFlip(p=0.25), Ka.RandomRotation(degrees=90.0, p=0.25), Ka.RandomAffine(degrees=1 * 5.0, shear=1 / 5, translate=1 / 20, p=0.25), Ka.RandomPerspective(distortion_scale=1 / 25, p=0.25), # PIXEL-LEVEL Ka.ColorJitter(brightness=1 / 30, p=0.25), # brightness Ka.ColorJitter(saturation=1 / 30, p=0.25), # saturation Ka.ColorJitter(contrast=1 / 30, p=0.25), # contrast Ka.ColorJitter(hue=1 / 30, p=0.25), # hue Ka.RandomMotionBlur(kernel_size=2 * (4 // 3) + 1, angle=1, direction=1.0, p=0.25),