def get_train_transforms(img_size: int) -> Compose: """Returns data transformations/augmentations for train dataset. Args: img_size: The resolution of the input image (img_size x img_size) """ return Compose([ RandomApply([ ColorJitter(brightness=0.3, contrast=0.01, saturation=0.01, hue=0), RandomAffine(0.1, translate=(0.04, 0.04), scale=(0.04, 0.04), shear=0.01, resample=2), #Grayscale(num_output_channels=3), #RandomCrop(30), RandomPerspective(0.1) ]), Resize([img_size, img_size], interpolation=3), ToTensor(), #RandomApply([ #RandomErasing(p=0.2, scale=(0.02, 0.33), ratio=(0.3, 3.3)) #]), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])
def test_perspective(self): from torchvision.transforms import RandomPerspective data = [ self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device) ] for tensor, pil_img in data: scripted_tranform = torch.jit.script(F.perspective) test_configs = [ [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], ] n = 10 test_configs += [ RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) ] for r in [ 0, ]: for spoints, epoints in test_configs: out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r) out_pil_tensor = torch.from_numpy( np.array(out_pil_img).transpose((2, 0, 1))) for fn in [F.perspective, scripted_tranform]: out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu() num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 ratio_diff_pixels = num_diff_pixels / out_tensor.shape[ -1] / out_tensor.shape[-2] # Tolerance : less than 5% of different pixels self.assertLess(ratio_diff_pixels, 0.05, msg="{}: {}\n{} vs \n{}".format( (r, spoints, epoints), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]))
def test_perspective(self): from torchvision.transforms import RandomPerspective data = [ self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device) ] scripted_transform = torch.jit.script(F.perspective) for tensor, pil_img in data: test_configs = [ [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], ] n = 10 test_configs += [ RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) ] for dt in [None, torch.float32, torch.float64, torch.float16]: if dt == torch.float16 and torch.device( self.device).type == "cpu": # skip float16 on CPU case continue if dt is not None: tensor = tensor.to(dtype=dt) self._test_perspective(tensor, pil_img, scripted_transform, test_configs) batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) if dt is not None: batch_tensors = batch_tensors.to(dtype=dt) for spoints, epoints in test_configs: self._test_fn_on_batch(batch_tensors, F.perspective, startpoints=spoints, endpoints=epoints, interpolation=0)
def test_perspective(self): from torchvision.transforms import RandomPerspective data = [self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device)] scripted_transform = torch.jit.script(F.perspective) for tensor, pil_img in data: test_configs = [ [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], ] n = 10 test_configs += [ RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) ] for dt in [None, torch.float32, torch.float64, torch.float16]: if dt == torch.float16 and torch.device(self.device).type == "cpu": # skip float16 on CPU case continue if dt is not None: tensor = tensor.to(dtype=dt) self._test_perspective(tensor, pil_img, scripted_transform, test_configs) batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) if dt is not None: batch_tensors = batch_tensors.to(dtype=dt) for spoints, epoints in test_configs: self._test_fn_on_batch( batch_tensors, F.perspective, startpoints=spoints, endpoints=epoints, interpolation=NEAREST ) # assert changed type warning spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationModes"): res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2) res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) self.assertTrue(res1.equal(res2))
def main() -> None: size = 500 n_images = 15000 fragment_sizes = [ 30, 40, 45, 48, 50, 55, 70, 71, 72, 75, 90, 100, 120, 140, 160, 200 ] angles = [ 5, 6, 7, 8, 10, 15, 16, 17, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100 ] bg_url = 'http://images.cocodataset.org/zips/val2017.zip' n_fragments_lo = 3 n_fragments_hi = 15 valid = 0.05 tiles_folder = os.path.join('raw') bg_folder = os.path.join('background', 'val2017') res_images_path = os.path.join('images') res_labels_path = os.path.join('labels') classes_path = os.path.join('classes.names') download_backgrounds(bg_folder, bg_url) name_to_i_class = get_name_to_i_class(classes_path) tiles = read_tiles(tiles_folder, name_to_i_class) tiles = apply_rotations(tiles, angles) tiles = apply_resize(tiles, fragment_sizes) tiles = apply_perspective(tiles, RandomPerspective(distortion_scale=0.7, p=0.8)) data_images = place_fragments_on_bg(tiles, bg_folder, n_fragments_lo=n_fragments_lo, n_fragments_hi=n_fragments_hi, img_size=size) data_images = apply_color_jitter( data_images, ColorJitter(brightness=0.6, contrast=0.8, saturation=0.8, hue=0.3)) write(data_images, res_images_path, res_labels_path, n_images) write_annotations(folder='data/custom/images', train_path='train.txt', valid_path='valid.txt', n_train=int((1 - valid) * n_images), n_valid=int(valid * n_images))
def __getitem__(self, index): img = Image.open(self.imgs_path[index]) if self.train: transform = Compose([ RandomHorizontalFlip(p=0.5), RandomPerspective( distortion_scale=0.5, p=0.5, interpolation=3, ), RandomResizedCrop((414, 414), scale=(0.5, 1)), Resize((414, 414)), ToTensor(), RandomErasing(p=0.3), ]) else: transform = Compose([Resize((414, 414)), ToTensor()]) img = transform(img) if self.train: label = self.label[index] return img, label else: return img
def MedT_preprocess_v2_image(img, train, mean=None, std=None) -> torch.Tensor: if std is None: std = [0.5, 0.5, 0.5] if mean is None: mean = [0.5, 0.5, 0.5] degrees = int(random.random() * 360) n, m = random.randint(1, 4), random.randint(2, 15) ShearTranslateAug = ShearTranslate(n, m) augmentations = [ RandomHorizontalFlip(), RandomRotation(degrees), RandomVerticalFlip(), RandomPerspective(), RandomBrightness(), RandomContrast(), RandomScale(), GaussianBlur(), RandomResizedCrop(), ShearTranslateAug ] augs_num_to_apply = random.randint(1, len(augmentations)) augs = random.sample(augmentations, augs_num_to_apply) if train == True: augment = Compose([Image.fromarray, *augs]) normilize = Compose([ToTensor(), Normalize(mean=mean, std=std)]) augmented = augment(img) preprocced = normilize(augmented).unsqueeze(0) return preprocced, augmented preprocessing = Compose([ToTensor(), Normalize(mean=mean, std=std)]) return preprocessing(img).unsqueeze(0), None
def maybe_random_perspective(image): if randint(0, 100) <= 2: return RandomPerspective(distortion_scale=randint(4, 10) / 10, p=1.)(image) return image
def apply_data_augmentation(self, img): aug = self.params["config"]["augmentation"] if aug and self.set_name == "train": # Convert to PIL Image img = img[:, :, 0] if img.shape[2] == 1 else img img = Image.fromarray(img) # Apply data augmentation if "dpi" in aug.keys() and np.random.rand() < aug["dpi"]["proba"]: factor = np.random.uniform(aug["dpi"]["min_factor"], aug["dpi"]["max_factor"]) img = DPIAdjusting(factor)(img) if "perspective" in aug.keys( ) and np.random.rand() < aug["perspective"]["proba"]: scale = np.random.uniform(aug["perspective"]["min_factor"], aug["perspective"]["max_factor"]) img = RandomPerspective(distortion_scale=scale, p=1, interpolation=Image.BILINEAR, fill=255)(img) elif "elastic_distortion" in aug.keys( ) and np.random.rand() < aug["elastic_distortion"]["proba"]: magnitude = np.random.randint( 1, aug["elastic_distortion"]["max_magnitude"] + 1) kernel = np.random.randint( 1, aug["elastic_distortion"]["max_kernel"] + 1) magnitude_w, magnitude_h = ( magnitude, 1) if np.random.randint(2) == 0 else (1, magnitude) img = ElasticDistortion(grid=(kernel, kernel), magnitude=(magnitude_w, magnitude_h), min_sep=(1, 1))(img) elif "random_transform" in aug.keys( ) and np.random.rand() < aug["random_transform"]["proba"]: img = RandomTransform(aug["random_transform"]["max_val"])(img) if "dilation_erosion" in aug.keys( ) and np.random.rand() < aug["dilation_erosion"]["proba"]: kernel_h = np.random.randint( aug["dilation_erosion"]["min_kernel"], aug["dilation_erosion"]["max_kernel"] + 1) kernel_w = np.random.randint( aug["dilation_erosion"]["min_kernel"], aug["dilation_erosion"]["max_kernel"] + 1) if np.random.randint(2) == 0: img = Erosion((kernel_w, kernel_h), aug["dilation_erosion"]["iterations"])(img) else: img = Dilation((kernel_w, kernel_h), aug["dilation_erosion"]["iterations"])(img) if "contrast" in aug.keys( ) and np.random.rand() < aug["contrast"]["proba"]: factor = np.random.uniform(aug["contrast"]["min_factor"], aug["contrast"]["max_factor"]) img = adjust_contrast(img, factor) if "brightness" in aug.keys( ) and np.random.rand() < aug["brightness"]["proba"]: factor = np.random.uniform(aug["brightness"]["min_factor"], aug["brightness"]["max_factor"]) img = adjust_brightness(img, factor) if "sign_flipping" in aug.keys( ) and np.random.rand() < aug["sign_flipping"]["proba"]: img = SignFlipping()(img) # convert to numpy array img = np.array(img) img = np.expand_dims(img, axis=2) if len(img.shape) == 2 else img return img
def main(model_type='resnet', n_epochs=20, lr=0.0005, batch_size=32): """ The main function. """ #set file paths train_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/train/train_resized' #'/data/train_resized/' #path to resized train image test_img_path = '/Users/emmarydholm/Documents/code/melanoma_classification/data_added_melanoma/test/test_resized' #'/data/test_resized/' #path to resized train image data_train = pd.read_csv( 'data/train_processed.csv') #path to processed csv file for train data data_test = pd.read_csv( 'data/test_processed.csv') #path to processed csv file for test data #split data_train into train and validation n_data_train = len(data_train) split = int(0.2 * n_data_train) data_train, data_valid = data_train.iloc[split:], data_train.iloc[0:split] #transformation for test and validation data transform_valid = Compose([ CenterCrop( 224), # Crops out the center, resulting image shape is 224x224 ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #augmentations for the training data transform_train = Compose([ CenterCrop(224), RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=3, fill=0), RandomVerticalFlip(p=0.5), RandomHorizontalFlip(p=0.5), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) #create the datasets dataset_train = MelanomaDataset(data_train, train_img_path, transform=transform_train) dataset_valid = MelanomaDataset(data_valid, train_img_path, transform=transform_valid) dataset_test = MelanomaTestDataset(data_test, test_img_path, transform=transform_valid) #create the batches with dataloader training_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True) validation_loader = DataLoader(dataset_valid, batch_size=batch_size, shuffle=True) #test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False) print('There is ', len(dataset_train), 'images in train set and ', \ len(dataset_valid), 'in dev set.') #define device if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' #define model and freeze the deepest layers if model_type == 'resnet': model = ResnetModel(9) no_train_layers = [ model.cnn.layer1, model.cnn.layer2, model.cnn.layer3 ] for layer in no_train_layers: for param in layer: param.requires_grad = False elif model_type == 'efficientnet': model = EfficientNetModel(9) model.cnn._conv_stem.requires_grad = False no_train_layers = model.cnn._blocks[:28] for layer in no_train_layers: #for param in layer: layer.requires_grad = False model = model.to(device) #define loss function loss_function = torch.nn.BCEWithLogitsLoss() #define optimizer optimizer = optim.Adam(model.parameters(), lr=lr) #define scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=1) train_loss = [] validation_loss = [] train_auc = [] val_auc = [] best_auc = 0.0 #training loop for i in range(n_epochs): t1, v1, t_auc, v_auc = train_epoch(training_loader, validation_loader, model, loss_function, optimizer, device) print(f"\r Epoch {i+1}: Training loss = {t1}, Validation loss = {v1}, \ \n Train auc = {t_auc}, Validation_auc = {v_auc}") print('lr = ', optimizer.param_groups[0]['lr']) train_loss.append(t1) validation_loss.append(v1) train_auc.append(t_auc) val_auc.append(v_auc) scheduler.step(v_auc) # save best model if v_auc > best_auc: torch.save(model, '/best_model.pt') best_auc = v_auc print('model saved') #plot the result epochs = np.arange(n_epochs) fig, ax = plt.subplots() ax.set_title('Training and Validation losses') ax.plot(epochs, train_loss, label='Train') ax.plot(epochs, validation_loss, label='Dev') plt.legend() fig, ax = plt.subplots() ax.set_title('Training and Validation ROC AUC') ax.plot(epochs, train_auc, label='Train') ax.plot(epochs, val_auc, label='Dev') plt.legend()
def main(args): perspective = RandomPerspective(args.data_aug_perspective) brightness = ColorJitter(args.data_aug_brightness) tensor = ToTensor() transforms = [perspective, brightness, tensor] args.dataset_root.mkdir(parents=True, exist_ok=True) if args.data_aug_hflip: transforms.insert(0, RandomHorizontalFlip()) composed = Compose(transforms) train_dataset = torchvision.datasets.CIFAR10( args.dataset_root, train=True, download=True, transform=composed ) test_dataset = torchvision.datasets.CIFAR10( args.dataset_root, train=False, download=False, transform=tensor ) train_loader = torch.utils.data.DataLoader( train_dataset, shuffle=True, batch_size=args.batch_size, pin_memory=True, num_workers=args.worker_count, ) test_loader = torch.utils.data.DataLoader( test_dataset, shuffle=False, batch_size=args.batch_size, num_workers=args.worker_count, pin_memory=True, ) model = CNN(height=32, width=32, channels=3, class_count=10, dropout=args.dropout) ## TASK 8: Redefine the criterion to be softmax cross entropy criterion = nn.CrossEntropyLoss() ## TASK 11: Define the optimizer optimizer = optim.SGD(model.parameters(), args.learning_rate, momentum=0.9) log_dir = get_summary_writer_log_dir(args) print(f"Writing logs to {log_dir}") summary_writer = SummaryWriter( str(log_dir), flush_secs=5 ) trainer = Trainer( model, train_loader, test_loader, criterion, optimizer, summary_writer, DEVICE ) trainer.train( args.epochs, args.val_frequency, print_frequency=args.print_frequency, log_frequency=args.log_frequency, ) summary_writer.close()
def apply_data_augmentation(img, da_config): applied_da = list() # Convert to PIL Image img = img[:, :, 0] if img.shape[2] == 1 else img img = Image.fromarray(img) # Apply data augmentation if "dpi" in da_config.keys( ) and np.random.rand() < da_config["dpi"]["proba"]: valid_factor = False while not valid_factor: factor = np.random.uniform(da_config["dpi"]["min_factor"], da_config["dpi"]["max_factor"]) valid_factor = True if ("max_width" in da_config["dpi"].keys() and factor*img.size[0] > da_config["dpi"]["max_width"]) or \ ("max_height" in da_config["dpi"].keys() and factor * img.size[1] > da_config["dpi"]["max_height"]): valid_factor = False if ("min_width" in da_config["dpi"].keys() and factor*img.size[0] < da_config["dpi"]["min_width"]) or \ ("min_height" in da_config["dpi"].keys() and factor * img.size[1] < da_config["dpi"]["min_height"]): valid_factor = False img = DPIAdjusting(factor)(img) applied_da.append("dpi: factor {}".format(factor)) if "perspective" in da_config.keys( ) and np.random.rand() < da_config["perspective"]["proba"]: scale = np.random.uniform(da_config["perspective"]["min_factor"], da_config["perspective"]["max_factor"]) img = RandomPerspective(distortion_scale=scale, p=1, interpolation=Image.BILINEAR, fill=255)(img) applied_da.append("perspective: scale {}".format(scale)) elif "elastic_distortion" in da_config.keys( ) and np.random.rand() < da_config["elastic_distortion"]["proba"]: magnitude = np.random.randint( 1, da_config["elastic_distortion"]["max_magnitude"] + 1) kernel = np.random.randint( 1, da_config["elastic_distortion"]["max_kernel"] + 1) magnitude_w, magnitude_h = ( magnitude, 1) if np.random.randint(2) == 0 else (1, magnitude) img = ElasticDistortion(grid=(kernel, kernel), magnitude=(magnitude_w, magnitude_h), min_sep=(1, 1))(img) applied_da.append( "elastic_distortion: magnitude ({}, {}) - kernel ({}, {})".format( magnitude_w, magnitude_h, kernel, kernel)) elif "random_transform" in da_config.keys( ) and np.random.rand() < da_config["random_transform"]["proba"]: img = RandomTransform(da_config["random_transform"]["max_val"])(img) applied_da.append("random_transform") if "dilation_erosion" in da_config.keys( ) and np.random.rand() < da_config["dilation_erosion"]["proba"]: kernel_h = np.random.randint( da_config["dilation_erosion"]["min_kernel"], da_config["dilation_erosion"]["max_kernel"] + 1) kernel_w = np.random.randint( da_config["dilation_erosion"]["min_kernel"], da_config["dilation_erosion"]["max_kernel"] + 1) if np.random.randint(2) == 0: img = Erosion((kernel_w, kernel_h), da_config["dilation_erosion"]["iterations"])(img) applied_da.append("erosion: kernel ({}, {})".format( kernel_w, kernel_h)) else: img = Dilation((kernel_w, kernel_h), da_config["dilation_erosion"]["iterations"])(img) applied_da.append("dilation: kernel ({}, {})".format( kernel_w, kernel_h)) if "contrast" in da_config.keys( ) and np.random.rand() < da_config["contrast"]["proba"]: factor = np.random.uniform(da_config["contrast"]["min_factor"], da_config["contrast"]["max_factor"]) img = adjust_contrast(img, factor) applied_da.append("contrast: factor {}".format(factor)) if "brightness" in da_config.keys( ) and np.random.rand() < da_config["brightness"]["proba"]: factor = np.random.uniform(da_config["brightness"]["min_factor"], da_config["brightness"]["max_factor"]) img = adjust_brightness(img, factor) applied_da.append("brightness: factor {}".format(factor)) if "color_jittering" in da_config.keys( ) and np.random.rand() < da_config["color_jittering"]["proba"]: img = ColorJitter( contrast=da_config["color_jittering"]["factor_contrast"], brightness=da_config["color_jittering"]["factor_brightness"], saturation=da_config["color_jittering"]["factor_saturation"], hue=da_config["color_jittering"]["factor_hue"], )(img) applied_da.append("jittering") if "sign_flipping" in da_config.keys( ) and np.random.rand() < da_config["sign_flipping"]["proba"]: img = SignFlipping()(img) applied_da.append("sign_flipping") if "crop" in da_config.keys( ) and np.random.rand() < da_config["crop"]["proba"]: new_w, new_h = [int(t * da_config["crop"]["ratio"]) for t in img.size] img = RandomCrop((new_h, new_w))(img) applied_da.append("random_crop") elif "fixed_crop" in da_config.keys( ) and np.random.rand() < da_config["fixed_crop"]["proba"]: img = RandomCrop( (da_config["fixed_crop"]["h"], da_config["fixed_crop"]["w"]))(img) applied_da.append("fixed_crop") # convert to numpy array img = np.array(img) img = np.expand_dims(img, axis=2) if len(img.shape) == 2 else img return img, applied_da
if __name__ == "__main__": args = AttrDict({ "lr": 0.001, "batch_size": 8, "checkpoint_each": 10, "epochs": 100, "checkpoints_dir": "./checkpoints_resnet18", "log_each": 200, "train_transform": Compose([ Resize((512, 512)), RandomHorizontalFlip(p=0.5), RandomVerticalFlip(p=0.5), RandomPerspective(), ToTensor(), ]), "val_transform": Compose([Resize((512, 512)), ToTensor()]), "train": True, "model_type": "resnet18" }) main(args)