def main(cfg: Config): if cfg.gpu is not None: torch.cuda.set_device(cfg.gpu) if homura.is_master(): import rich rich.print(cfg) vs = DATASET_REGISTRY("imagenet") vs.collate_fn = fast_collate if cfg.data.mixup == 0 else gen_mixup_collate( cfg.data.mixup) model = MLPMixers(cfg.model.name)(num_classes=1_000, droppath_rate=cfg.model.droppath_rate) train_da = vs.default_train_da.copy() if cfg.data.autoaugment: train_da.append(AutoAugment()) post_da = [RandomErasing()] if cfg.data.random_erasing else None train_loader, test_loader = vs( batch_size=cfg.data.batch_size, train_da=train_da, post_norm_train_da=post_da, train_size=cfg.data.batch_size * 50 if cfg.debug else None, test_size=cfg.data.batch_size * 50 if cfg.debug else None, num_workers=12) optimizer = homura.optim.AdamW(cfg.optim.lr, weight_decay=cfg.optim.weight_decay, multi_tensor=True) scheduler = homura.lr_scheduler.CosineAnnealingWithWarmup( cfg.optim.epochs, multiplier=cfg.optim.multiplier, warmup_epochs=cfg.optim.warmup_epochs, min_lr=cfg.optim.min_lr) with Trainer(model, optimizer, SmoothedCrossEntropy(cfg.optim.label_smoothing), reporters=[reporters.TensorboardReporter(".")], scheduler=scheduler, use_amp=cfg.amp, use_cuda_nonblocking=True, report_accuracy_topk=5, optim_cfg=cfg.optim, debug=cfg.debug, cfg=cfg.model) as trainer: for ep in trainer.epoch_range(cfg.optim.epochs): trainer.train(train_loader) trainer.test(test_loader) trainer.scheduler.step() if not cfg.no_save: trainer.save(f"outputs/{cfg.model.name}", f"{ep}") print( f"Max Test Accuracy={max(trainer.reporter.history('accuracy/test')):.3f}" )
def loss_gradient_framework( self, x: "torch.Tensor", y: "torch.Tensor", **kwargs ) -> "torch.Tensor": x_var = Variable(x, requires_grad=True) y_cat = torch.argmax(y) transform = RandomErasing(p=1.0, scale=(0.5, 0.5)) x_mod = torch.stack([transform(x_var[0]) for i in range(100)], dim=0) logits = self.model.net.forward(x_mod) loss = self._loss(logits, y_cat.repeat(100)) self._model.zero_grad() loss.backward() grads = x_var.grad return grads
def add_train_transforms(self, cutout, lighting): self.transforms += [ Resize(self.img_size), RandomHorizontalFlip(), ImageNetPolicy(), torchvision.transforms.ToTensor(), ] if cutout: self.transforms += [ RandomErasing(p=0.4) ] if lighting: self.transforms += [ Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']), ]
def get_transform(self, trns_mode): transform_list = [] transform_list.append(Resize((256, 256))) if trns_mode == 'train': transform_list.append(RandomCrop((224, 224))) transform_list.append(RandomGrayscale()) transform_list.append(RandomOrder( [RandomHorizontalFlip(), ColorJitter(), RandomRotation(20)])) else: transform_list.append(CenterCrop((224, 224))) transform_list.append(ToTensor()) transform_list.append(Normalize(self.mean, self.std)) if trns_mode == 'train': transform_list.append(RandomErasing(value='random')) return Compose(transform_list)
def get_transform(self, is_train: bool = False): if is_train: return Compose([ RandomResizedCrop(self.crop_size), RandomHorizontalFlip(), RandomVerticalFlip(), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), RandomErasing() ]) else: return Compose([ Resize(self.crop_size), CenterCrop(self.crop_size), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])
def test_torchvision_on_various_input( use_different_transformation_per_channel: bool, ) -> None: """ This tests that we can run transformation pipeline with out of the box torchvision transforms on various types of input: PIL image, 3D tensor, 4D tensors. Tests that use_different_transformation_per_channel has the correct behavior. """ image_as_pil, image_2d_as_CHW_tensor, image_2d_as_ZCHW_tensor, scan_4d_as_tensor = create_test_images() transform = ImageTransformationPipeline( [ CenterCrop(crop_size), RandomErasing(), RandomAffine(degrees=(10, 12), shear=15, translate=(0.1, 0.3)), ], use_different_transformation_per_channel, ) # Test PIL image input transformed = transform(image_as_pil) assert isinstance(transformed, torch.Tensor) assert transformed.shape == torch.Size([1, crop_size, crop_size]) # Test image as [C, H. W] tensor transformed = transform(image_2d_as_CHW_tensor.clone()) assert isinstance(transformed, torch.Tensor) assert transformed.shape == torch.Size([1, crop_size, crop_size]) # Test image as [1, 1, H, W] transformed = transform(image_2d_as_ZCHW_tensor) assert isinstance(transformed, torch.Tensor) assert transformed.shape == torch.Size([1, 1, crop_size, crop_size]) # Test with a fake 4D scan [C, Z, H, W] -> [25, 34, 32, 32] transformed = transform(scan_4d_as_tensor) assert isinstance(transformed, torch.Tensor) assert transformed.shape == torch.Size([5, 4, crop_size, crop_size]) # Same transformation should be applied to all slices and channels. assert ( torch.isclose(transformed[0, 0], transformed[1, 1]).all() != use_different_transformation_per_channel )
def __getitem__(self, index): img = Image.open(self.imgs_path[index]) if self.train: transform = Compose([ RandomHorizontalFlip(p=0.5), RandomPerspective( distortion_scale=0.5, p=0.5, interpolation=3, ), RandomResizedCrop((414, 414), scale=(0.5, 1)), Resize((414, 414)), ToTensor(), RandomErasing(p=0.3), ]) else: transform = Compose([Resize((414, 414)), ToTensor()]) img = transform(img) if self.train: label = self.label[index] return img, label else: return img
def maybe_random_erase(image): if randint(0, 100) <= 7: return ToPILImage()(RandomErasing(p=1.)(ToTensor()(image))) return image
def __call__(self, image): tensor = ToTensor()(image) for repetition in range(self.num_repetitions): tensor = RandomErasing(self.p, self.scale, self.ratio, self.value, self.inplace)(tensor) return ToPILImage()(tensor)
def create_transforms_from_config( config: CfgNode, apply_augmentations: bool, expand_channels: bool = True) -> ImageTransformationPipeline: """ Defines the image transformations pipeline from a config file. It has been designed for Chest X-Ray images but it can be used for other types of images data, type of augmentations to use and strength are expected to be defined in the config. The channel expansion is needed for gray images. :param config: config yaml file fixing strength and type of augmentation to apply :param apply_augmentations: if True return transformation pipeline with augmentations. Else, disable augmentations i.e. only resize and center crop the image. :param expand_channels: if True the expand channel transformation from InnerEye.ML.augmentations.image_transforms will be added to the transformation passed through the config. This is needed for single channel images as CXR. """ transforms: List[Any] = [] if expand_channels: transforms.append(ExpandChannels()) if apply_augmentations: if config.augmentation.use_random_affine: transforms.append( RandomAffine( degrees=config.augmentation.random_affine.max_angle, translate=( config.augmentation.random_affine.max_horizontal_shift, config.augmentation.random_affine.max_vertical_shift), shear=config.augmentation.random_affine.max_shear)) if config.augmentation.use_random_crop: transforms.append( RandomResizedCrop(scale=config.augmentation.random_crop.scale, size=config.preprocess.resize)) else: transforms.append(Resize(size=config.preprocess.resize)) if config.augmentation.use_random_horizontal_flip: transforms.append( RandomHorizontalFlip( p=config.augmentation.random_horizontal_flip.prob)) if config.augmentation.use_gamma_transform: transforms.append( RandomGamma(scale=config.augmentation.gamma.scale)) if config.augmentation.use_random_color: transforms.append( ColorJitter( brightness=config.augmentation.random_color.brightness, contrast=config.augmentation.random_color.contrast, saturation=config.augmentation.random_color.saturation)) if config.augmentation.use_elastic_transform: transforms.append( ElasticTransform( alpha=config.augmentation.elastic_transform.alpha, sigma=config.augmentation.elastic_transform.sigma, p_apply=config.augmentation.elastic_transform.p_apply)) transforms.append(CenterCrop(config.preprocess.center_crop_size)) if config.augmentation.use_random_erasing: transforms.append( RandomErasing(scale=config.augmentation.random_erasing.scale, ratio=config.augmentation.random_erasing.ratio)) if config.augmentation.add_gaussian_noise: transforms.append( AddGaussianNoise( p_apply=config.augmentation.gaussian_noise.p_apply, std=config.augmentation.gaussian_noise.std)) else: transforms += [ Resize(size=config.preprocess.resize), CenterCrop(config.preprocess.center_crop_size) ] pipeline = ImageTransformationPipeline(transforms) return pipeline
def __init__(self): super().__init__() self.net = Net() self.transform = RandomErasing(p=1.0, scale=(0.5, 0.5))
def test_create_transform_pipeline_from_config() -> None: """ Tests that the pipeline returned by create_transform_pipeline_from_config returns the expected transformation. """ transformation_pipeline = create_cxr_transforms_from_config( cxr_augmentation_config, apply_augmentations=True) fake_cxr_as_array = np.ones([256, 256]) * 255. fake_cxr_as_array[100:150, 100:200] = 1 fake_cxr_image = PIL.Image.fromarray(fake_cxr_as_array).convert("L") all_transforms = [ ExpandChannels(), RandomAffine(degrees=180, translate=(0, 0), shear=40), RandomResizedCrop(scale=(0.4, 1.0), size=256), RandomHorizontalFlip(p=0.5), RandomGamma(scale=(0.5, 1.5)), ColorJitter(saturation=0, brightness=0.2, contrast=0.2), ElasticTransform(sigma=4, alpha=34, p_apply=0.4), CenterCrop(size=224), RandomErasing(scale=(0.15, 0.4), ratio=(0.33, 3)), AddGaussianNoise(std=0.05, p_apply=0.5) ] np.random.seed(3) torch.manual_seed(3) random.seed(3) transformed_image = transformation_pipeline(fake_cxr_image) assert isinstance(transformed_image, torch.Tensor) # Expected pipeline image = np.ones([256, 256]) * 255. image[100:150, 100:200] = 1 image = PIL.Image.fromarray(image).convert("L") # In the pipeline the image is converted to tensor before applying the transformations. Do the same here. image = ToTensor()(image).reshape([1, 1, 256, 256]) np.random.seed(3) torch.manual_seed(3) random.seed(3) expected_transformed = image for t in all_transforms: expected_transformed = t(expected_transformed) # The pipeline takes as input [C, Z, H, W] and returns [C, Z, H, W] # But the transforms list expect [Z, C, H, W] and returns [Z, C, H, W] so need to permute dimension to compare expected_transformed = torch.transpose(expected_transformed, 1, 0).squeeze(1) assert torch.isclose(expected_transformed, transformed_image).all() # Test the evaluation pipeline transformation_pipeline = create_cxr_transforms_from_config( cxr_augmentation_config, apply_augmentations=False) transformed_image = transformation_pipeline(image) assert isinstance(transformed_image, torch.Tensor) all_transforms = [ExpandChannels(), Resize(size=256), CenterCrop(size=224)] expected_transformed = image for t in all_transforms: expected_transformed = t(expected_transformed) expected_transformed = torch.transpose(expected_transformed, 1, 0).squeeze(1) assert torch.isclose(expected_transformed, transformed_image).all()
def get_train_eval_loaders(path, batch_size=256): """Setup the dataflow: - load CIFAR100 train and test datasets - setup train/test image transforms - horizontally flipped randomly and augmented using cutout. - each mini-batch contained 256 examples - setup train/test data loaders Returns: train_loader, test_loader, eval_train_loader """ train_transform = Compose([ Pad(4), RandomCrop(32), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), RandomErasing(), ]) test_transform = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CIFAR100(root=path, train=True, transform=train_transform, download=True) test_dataset = CIFAR100(root=path, train=False, transform=test_transform, download=False) train_eval_indices = [ random.randint(0, len(train_dataset) - 1) for i in range(len(test_dataset)) ] train_eval_dataset = Subset(train_dataset, train_eval_indices) train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=12, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) eval_train_loader = DataLoader(train_eval_dataset, batch_size=batch_size, num_workers=12, shuffle=False, drop_last=False, pin_memory=True) return train_loader, test_loader, eval_train_loader
def cifar10_unsupervised_dataloaders(): print('Data Preparation') train_transform = Compose([ Pad(4), RandomCrop(32, fill=128), RandomHorizontalFlip(), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), RandomErasing(scale=(0.1, 0.33)), ]) unsupervised_train_transformation = Compose([ Pad(4), RandomCrop(32, fill=128), ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # RANDAUGMENT unsupervised_train_transformation.transforms.insert(0, RandAugment(3, 9)) test_transform = Compose([ ToTensor(), Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # Train dataset with and without labels cifar10_train_ds = datasets.CIFAR10('/data/', train=True, download=True) num_classes = len(cifar10_train_ds.classes) print('Loading dataset {0} for training -- Num_samples: {1}, num_classes: {2}'.format(datasets.CIFAR10.__name__,len(cifar10_train_ds),10)) labelled_indices = [] unlabelled_indices = [] indices = np.random.permutation(len(cifar10_train_ds)) class_counters = list([0] * num_classes) max_counter = 10000 // num_classes for i in indices: dp = cifar10_train_ds[i] if len(cifar10_train_ds) < sum(class_counters): unlabelled_indices.append(i) else: y = dp[1] c = class_counters[y] if c < max_counter: class_counters[y] += 1 labelled_indices.append(i) else: unlabelled_indices.append(i) # Labelled and unlabelled dataset train_labelled_ds = Subset(cifar10_train_ds, labelled_indices) train_labelled_ds_t = AddTransform(train_labelled_ds, train_transform) # unlabelled ds and aug ds train_unlabelled_ds = Subset(cifar10_train_ds, unlabelled_indices) train_unlabelled_ds = ConcatDataset([train_unlabelled_ds,train_labelled_ds]) # apply transformation for both train_unlabelled_ds_t = AddTransform(train_unlabelled_ds, train_transform) train_unlabelled_aug_ds_t = AddTransform(train_unlabelled_ds, unsupervised_train_transformation) print('Labelled dataset -- Num_samples: {0}, classes: {1}, \n Unsupervised dataset -- Num_samples {2}, Augmentation -- Num_samples: {3}' .format(len(train_labelled_ds_t), 10, len(train_unlabelled_ds_t), len(train_unlabelled_aug_ds_t))) # Data loader for labeled and unlabeled train dataset train_labelled = DataLoader( train_labelled_ds_t, batch_size=64, shuffle=False, num_workers=8, pin_memory=True ) train_unlabelled = DataLoader( train_unlabelled_ds_t, batch_size=64, shuffle=False, num_workers=8, pin_memory=True ) train_unlabelled_aug = DataLoader( train_unlabelled_aug_ds_t, batch_size=64, shuffle=False, num_workers=8, pin_memory=True ) # Data loader for test dataset cifar10_test_ds = datasets.CIFAR10('/data/', transform=test_transform, train=False, download=True) print('Test set -- Num_samples: {0}'.format(len(cifar10_test_ds))) test = DataLoader( cifar10_test_ds, batch_size=64, shuffle=False, num_workers=8, pin_memory=True ) return train_labelled, train_unlabelled, train_unlabelled_aug, test
def __getitem__(self, index: int): # while(True): video, img_file, label, ori_video, frame, fold = self.data[index] # try: if self.mode == "train": label = np.clip(label, self.label_smoothing, 1 - self.label_smoothing) # Load image and mask img_path = os.path.join(self.data_root, self.crops_dir, video, img_file) image = cv2.imread(img_path, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Applying hardcore augmentations without rotation if self.mode == "train" and self.hardcore and not self.rotation and not self.random_erase: landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy") # Remove facial features using landmark informations if os.path.exists(landmark_path) and random.random() < 0.7: landmarks = np.load(landmark_path) image = remove_landmark(image, landmarks, self.cutout_fill) # Remove facial parts using convex hull elif random.random() < 0.4: err = 0 cp = np.copy(image) try: blackout_convex_hull(cp, detector, predictor, self.cutout_fill) except Exception: err = 1 if err == 0: image = cp # Remove parts of image randomly from 6 bitmasks # elif random.random() < 0.1: # binary_mask = mask > 0.4 * 255 # masks = prepare_bit_masks((binary_mask * 1).astype(np.uint8)) # tries = 6 # current_try = 1 # while current_try < tries: # bitmap_msk = random.choice(masks) # if label < 0.5 or np.count_nonzero(mask * bitmap_msk) > 20: # mask *= bitmap_msk # image *= np.expand_dims(bitmap_msk, axis=-1) # break # current_try += 1 # Resize image and remove extra padding outside face if self.mode == "train" and self.padding_part > 3: image = change_padding(image, self.padding_part) # Change label depending on ratio of fake parts in mask # TODO : change mask to binary_mask # valid_label = np.count_nonzero(mask[mask > 20]) > 32 or label < 0.5 # valid_label = 1 if valid_label else 0 # Use builtin transforms passed in if self.transforms is not None: data = self.transforms(image=image) image = data["image"] # mask = data["mask"] # If hardcore augmentation and rotation are both true # then applies only random blackout # if self.mode == "train" and self.hardcore and self.rotation: # dropout = 0.8 if label > 0.5 else 0.6 # if self.rotation: # dropout *= 0.7 # elif random.random() < dropout: # blackout_random(image, mask, label) rotation = 0 # Applies 90 degree rotation if self.mode == "train" and self.rotation: rotation = random.randint(0, 3) image = rot90(image, rotation) # Saves 20% of the train images # if(random.random() < 0.1 and conv == True): # os.makedirs("train_images", exist_ok=True) # cv2.imwrite(os.path.join("train_images", video+ "_" + str(1 if label > 0.5 else 0) + "_"+img_file), image[...,::-1]) image = img_to_tensor(image, self.normalize) if self.mode == "train" and self.random_erase: image = RandomErasing(p=0.5, scale=(0.02, 0.2), value="random")(image) return { "image": image, "label": np.array((label, )), "image_name": os.path.join(video, img_file), "rotations": rotation }