def test_tranform_dict(self, input): transforms = Compose([ Range("random flip dict")(FlipD(keys="image")), Range()(ToTensorD("image")) ]) # Apply transforms output = transforms(input)["image"] # Decorate with NVTX Range transforms1 = Range()(transforms) transforms2 = Range("Transforms2")(transforms) transforms3 = Range(name="Transforms3", methods="__call__")(transforms) # Apply transforms with Range output1 = transforms1(input)["image"] output2 = transforms2(input)["image"] output3 = transforms3(input)["image"] # Check the outputs self.assertIsInstance(output, torch.Tensor) self.assertIsInstance(output1, torch.Tensor) self.assertIsInstance(output2, torch.Tensor) self.assertIsInstance(output3, torch.Tensor) np.testing.assert_equal(output.numpy(), output1.numpy()) np.testing.assert_equal(output.numpy(), output2.numpy()) np.testing.assert_equal(output.numpy(), output3.numpy())
def test_loss(self, input): # Create a network and loss model = torch.nn.Sigmoid() loss = torch.nn.BCELoss() pred = model(input) target = torch.empty_like(input).random_(2) # Loss evaluation output = loss(pred, target) # Decorate with NVTX Range loss1 = Range()(loss) loss2 = Range("Loss2")(loss) loss3 = Range(name="Loss3", methods="forward")(loss) # Loss evaluation with Range output1 = loss1(pred, target) output2 = loss2(pred, target) output3 = loss3(pred, target) # Check the outputs self.assertIsInstance(output, torch.Tensor) self.assertIsInstance(output1, torch.Tensor) self.assertIsInstance(output2, torch.Tensor) self.assertIsInstance(output3, torch.Tensor) np.testing.assert_equal(output.numpy(), output1.numpy()) np.testing.assert_equal(output.numpy(), output2.numpy()) np.testing.assert_equal(output.numpy(), output3.numpy())
def test_network(self, input): # Create a network model = torch.nn.Sequential(torch.nn.ReLU(), torch.nn.Sigmoid()) # Forward output = model(input) # Decorate with NVTX Range model1 = Range()(model) model2 = Range("Model2")(model) model3 = Range(name="Model3", methods="forward")(model) # Forward with Range output1 = model1(input) output2 = model2(input) output3 = model3(input) # Check the outputs self.assertIsInstance(output, torch.Tensor) self.assertIsInstance(output1, torch.Tensor) self.assertIsInstance(output2, torch.Tensor) self.assertIsInstance(output3, torch.Tensor) np.testing.assert_equal(output.numpy(), output1.numpy()) np.testing.assert_equal(output.numpy(), output2.numpy()) np.testing.assert_equal(output.numpy(), output3.numpy())
def test_context_manager(self): model = torch.nn.Sigmoid() loss = torch.nn.BCELoss() with Range(): input = torch.randn(3, requires_grad=True) target = torch.empty(3).random_(2) with Range("Model"): output = loss(model(input), target) output.backward()
def test_recursive_list_tranforms(self, input, transform_list): transforms_list_range = Range(recursive=True)(transform_list) # Apply transforms output = Compose(transform_list)(input) # Apply transforms with Range output_r = Compose(transforms_list_range)(input) # Check the outputs np.testing.assert_equal(output.numpy(), output_r.numpy())
def test_recursive_tranforms(self, input, transforms): transforms_range = Range(name="Recursive Compose", recursive=True)(transforms) # Apply transforms output = transforms(input) # Apply transforms with Range output_r = transforms_range(input) # Check the outputs self.assertEqual(transforms.map_items, transforms_range.map_items) self.assertEqual(transforms.unpack_items, transforms_range.unpack_items) self.assertEqual(transforms.log_stats, transforms_range.log_stats) np.testing.assert_equal(output.numpy(), output_r.numpy())
def validation(model, loss_fn, amp, dataloader, pre_process, post_process, device, print_step): model.eval() n_steps = len(dataloader) iter_data = iter(dataloader) total_acc = 0 total_loss = 0 total_n_samples = 0 for step in range(n_steps): with Range("Validation Step"): batch = next(iter_data) x = batch["image"].to(device) y = batch["label"].to(device) if pre_process is not None: x = pre_process(x) with autocast(enabled=amp): output = model(x) loss = loss_fn(output, y) pred = post_process(output) acc_data = (pred == y).float().sum().item() loss_data = loss.item() n_samples = y.shape[0] total_acc += acc_data total_loss += loss_data * n_samples total_n_samples += n_samples if step % print_step == 0: logging.info( f"[Validation] " f"Step : {step + 1}/{n_steps} -- " f"valid_loss : {loss_data:.3f}, valid_acc : {acc_data / n_samples:.2f}" ) return (total_loss / total_n_samples, total_acc / total_n_samples)
def test_wrapper_tranforms(self, input): transform_list = [ ToTensor(), TorchVision(name="RandomHorizontalFlip", p=1.0), ToCupy(), CuCIM(name="image_flip", spatial_axis=-1), RandCuCIM(name="rand_image_rotate_90", prob=1.0, max_k=1, spatial_axis=(-2, -1)), ] transforms = Compose(transform_list) transforms_range = Compose([Range()(t) for t in transform_list]) # Apply transforms output = transforms(input) # Apply transforms with Range output_r = transforms_range(input) # Check the outputs np.testing.assert_equal(output.get(), output_r.get())
def test_tranform_randomized(self, input): # Compose deterministic and randomized transforms transforms = Compose([ Range("flip")(Flip()), Rotate90(), Range()(RandAdjustContrast(prob=0.0)), Range("random flip")(RandFlip(prob=1.0)), ToTensor(), ]) # Apply transforms output = transforms(input) # Decorate with NVTX Range transforms1 = Range()(transforms) transforms2 = Range("Transforms2")(transforms) transforms3 = Range(name="Transforms3", methods="__call__")(transforms) # Apply transforms with Range output1 = transforms1(input) output2 = transforms2(input) output3 = transforms3(input) # Check if the outputs are equal self.assertIsInstance(output, torch.Tensor) self.assertIsInstance(output1, torch.Tensor) self.assertIsInstance(output2, torch.Tensor) self.assertIsInstance(output3, torch.Tensor) np.testing.assert_equal(output.numpy(), output1.numpy()) np.testing.assert_equal(output.numpy(), output2.numpy()) np.testing.assert_equal(output.numpy(), output3.numpy()) # Check if the first randomized is RandAdjustContrast for tran in transforms.transforms: if isinstance(tran, Randomizable): self.assertIsInstance(tran, RandAdjustContrast) break
def training( summary, model, loss_fn, optimizer, scaler, amp, dataloader, pre_process, post_process, device, writer: SummaryWriter, print_step, ): model.train() n_steps = len(dataloader) iter_data = iter(dataloader) for step in range(n_steps): with Range("Step"): with Range("Data Loading"): batch = next(iter_data) x = batch["image"].to(device) y = batch["label"].to(device) if pre_process is not None: x = pre_process(x) with autocast(enabled=amp): output = model(x) loss = loss_fn(output, y) optimizer.zero_grad() if amp: scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() pred = post_process(output) acc_data = (pred == y).float().mean().item() loss_data = loss.item() writer.add_scalar("train/loss", loss_data, summary["step"]) writer.add_scalar("train/accuracy", acc_data, summary["step"]) if step % print_step == 0: logging.info( f"[Training] Epoch: {summary['epoch']}/{summary['n_epochs']}, " f"Step: {step + 1}/{n_steps} -- " f"train_loss: {loss_data:.5f}, train_acc: {acc_data:.3f}") summary["step"] += 1 summary["epoch"] += 1 return summary
def main(cfg): # ------------------------------------------------------------------------- # Configs # ------------------------------------------------------------------------- # Create log/model dir log_dir = create_log_dir(cfg) # Set the logger logging.basicConfig( format="%(asctime)s %(levelname)2s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", ) log_name = os.path.join(log_dir, "logs.txt") logger = logging.getLogger() fh = logging.FileHandler(log_name) fh.setLevel(logging.INFO) logger.addHandler(fh) # Set TensorBoard summary writer writer = SummaryWriter(log_dir) # Save configs logging.info(json.dumps(cfg)) with open(os.path.join(log_dir, "config.json"), "w") as fp: json.dump(cfg, fp, indent=4) # Set device cuda/cpu device = set_device(cfg) # Set cudnn benchmark/deterministic if cfg["benchmark"]: torch.backends.cudnn.benchmark = True else: set_determinism(seed=0) # ------------------------------------------------------------------------- # Transforms and Datasets # ------------------------------------------------------------------------- # Pre-processing preprocess_cpu_train = None preprocess_gpu_train = None preprocess_cpu_valid = None preprocess_gpu_valid = None if cfg["backend"] == "cucim": preprocess_cpu_train = Compose([ToTensorD(keys="label")]) preprocess_gpu_train = Compose([ Range()(ToCupy()), Range("ColorJitter")(RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04)), Range("RandomFlip")(RandCuCIM(name="image_flip", apply_prob=cfg["prob"], spatial_axis=-1)), Range("RandomRotate90")(RandCuCIM(name="rand_image_rotate_90", prob=cfg["prob"], max_k=3, spatial_axis=(-2, -1))), Range()(CastToType(dtype=np.float32)), Range("RandomZoom")(RandCuCIM(name="rand_zoom", min_zoom=0.9, max_zoom=1.1)), Range("ScaleIntensity")(CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)), Range()(ToTensor(device=device)), ]) preprocess_cpu_valid = Compose([ToTensorD(keys="label")]) preprocess_gpu_valid = Compose([ Range("ValidToCupyAndCast")(ToCupy(dtype=np.float32)), Range("ValidScaleIntensity")(CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)), Range("ValidToTensor")(ToTensor(device=device)), ]) elif cfg["backend"] == "numpy": preprocess_cpu_train = Compose([ Range()(ToTensorD(keys=("image", "label"))), Range("ColorJitter")(TorchVisionD( keys="image", name="ColorJitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04, )), Range()(ToNumpyD(keys="image")), Range("RandomFlip")(RandFlipD(keys="image", prob=cfg["prob"], spatial_axis=-1)), Range("RandomRotate90")(RandRotate90D(keys="image", prob=cfg["prob"])), Range()(CastToTypeD(keys="image", dtype=np.float32)), Range("RandomZoom")(RandZoomD(keys="image", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1)), Range("ScaleIntensity")(ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)), Range()(ToTensorD(keys="image")), ]) preprocess_cpu_valid = Compose([ Range("ValidCastType")(CastToTypeD(keys="image", dtype=np.float32)), Range("ValidScaleIntensity")(ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)), Range("ValidToTensor")(ToTensorD(keys=("image", "label"))), ]) else: raise ValueError( f"Backend should be either numpy or cucim! ['{cfg['backend']}' is provided.]" ) # Post-processing postprocess = Compose([ Activations(sigmoid=True), AsDiscrete(threshold=0.5), ]) # Create MONAI dataset train_json_info_list = load_decathlon_datalist( data_list_file_path=cfg["dataset_json"], data_list_key="training", base_dir=cfg["data_root"], ) valid_json_info_list = load_decathlon_datalist( data_list_file_path=cfg["dataset_json"], data_list_key="validation", base_dir=cfg["data_root"], ) train_dataset = PatchWSIDataset( data=train_json_info_list, region_size=cfg["region_size"], grid_shape=cfg["grid_shape"], patch_size=cfg["patch_size"], transform=preprocess_cpu_train, image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", ) valid_dataset = PatchWSIDataset( data=valid_json_info_list, region_size=cfg["region_size"], grid_shape=cfg["grid_shape"], patch_size=cfg["patch_size"], transform=preprocess_cpu_valid, image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", ) # DataLoaders train_dataloader = DataLoader(train_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=cfg["pin"]) valid_dataloader = DataLoader(valid_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=cfg["pin"]) # Get sample batch and some info first_sample = first(train_dataloader) if first_sample is None: raise ValueError("First sample is None!") for d in ["image", "label"]: logging.info(f"[{d}] \n" f" {d} shape: {first_sample[d].shape}\n" f" {d} type: {type(first_sample[d])}\n" f" {d} dtype: {first_sample[d].dtype}") logging.info(f"Batch size: {cfg['batch_size']}") logging.info(f"[Training] number of batches: {len(train_dataloader)}") logging.info(f"[Validation] number of batches: {len(valid_dataloader)}") # ------------------------------------------------------------------------- # Deep Learning Model and Configurations # ------------------------------------------------------------------------- # Initialize model model = TorchVisionFCModel("resnet18", n_classes=1, use_conv=True, pretrained=cfg["pretrain"]) model = model.to(device) # Loss function loss_func = torch.nn.BCEWithLogitsLoss() loss_func = loss_func.to(device) # Optimizer if cfg["novograd"] is True: optimizer = Novograd(model.parameters(), lr=cfg["lr"]) else: optimizer = SGD(model.parameters(), lr=cfg["lr"], momentum=0.9) # AMP scaler cfg["amp"] = cfg["amp"] and monai.utils.get_torch_version_tuple() >= (1, 6) if cfg["amp"] is True: scaler = GradScaler() else: scaler = None # Learning rate scheduler if cfg["cos"] is True: scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg["n_epochs"]) else: scheduler = None # ------------------------------------------------------------------------- # Training/Evaluating # ------------------------------------------------------------------------- train_counter = {"n_epochs": cfg["n_epochs"], "epoch": 1, "step": 1} total_valid_time, total_train_time = 0.0, 0.0 t_start = time.perf_counter() metric_summary = {"loss": np.Inf, "accuracy": 0, "best_epoch": 1} # Training/Validation Loop for _ in range(cfg["n_epochs"]): t_epoch = time.perf_counter() logging.info( f"[Training] learning rate: {optimizer.param_groups[0]['lr']}") # Training with Range("Training Epoch"): train_counter = training( train_counter, model, loss_func, optimizer, scaler, cfg["amp"], train_dataloader, preprocess_gpu_train, postprocess, device, writer, cfg["print_step"], ) if scheduler is not None: scheduler.step() if cfg["save"]: torch.save( model.state_dict(), os.path.join(log_dir, f"model_epoch_{train_counter['epoch']}.pt")) t_train = time.perf_counter() train_time = t_train - t_epoch total_train_time += train_time # Validation if cfg["validate"]: with Range("Validation"): valid_loss, valid_acc = validation( model, loss_func, cfg["amp"], valid_dataloader, preprocess_gpu_valid, postprocess, device, cfg["print_step"], ) t_valid = time.perf_counter() valid_time = t_valid - t_train total_valid_time += valid_time if valid_loss < metric_summary["loss"]: metric_summary["loss"] = min(valid_loss, metric_summary["loss"]) metric_summary["accuracy"] = max(valid_acc, metric_summary["accuracy"]) metric_summary["best_epoch"] = train_counter["epoch"] writer.add_scalar("valid/loss", valid_loss, train_counter["epoch"]) writer.add_scalar("valid/accuracy", valid_acc, train_counter["epoch"]) logging.info( f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.2f}, " f"time: {t_valid - t_epoch:.1f}s (train: {train_time:.1f}s, valid: {valid_time:.1f}s)" ) else: logging.info( f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] Train time: {train_time:.1f}s" ) writer.flush() t_end = time.perf_counter() # Save final metrics metric_summary["train_time_per_epoch"] = total_train_time / cfg["n_epochs"] metric_summary["total_time"] = t_end - t_start writer.add_hparams(hparam_dict=cfg, metric_dict=metric_summary, run_name=log_dir) writer.close() logging.info(f"Metric Summary: {metric_summary}") # Save the best and final model if cfg["validate"] is True: copyfile( os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pth"), os.path.join(log_dir, "model_best.pth"), ) copyfile( os.path.join(log_dir, f"model_epoch_{cfg['n_epochs']}.pth"), os.path.join(log_dir, "model_final.pth"), ) # Final prints logging.info( f"[Completed] {train_counter['epoch']} epochs -- time: {t_end - t_start:.1f}s " f"(training: {total_train_time:.1f}s, validation: {total_valid_time:.1f}s)", ) logging.info(f"Logs and model was saved at: {log_dir}")
out_dir = "./outputs_base" train_images = sorted( glob.glob(os.path.join(data_root, "imagesTr", "*.nii.gz"))) train_labels = sorted( glob.glob(os.path.join(data_root, "labelsTr", "*.nii.gz"))) data_dicts = [{ "image": image_name, "label": label_name } for image_name, label_name in zip(train_images, train_labels)] train_files, val_files = data_dicts[:-9], data_dicts[-9:] set_determinism(seed=0) train_transforms = Compose([ Range("LoadImage")(LoadImaged(keys=["image", "label"])), Range()(EnsureChannelFirstd(keys=["image", "label"])), Range()(Orientationd(keys=["image", "label"], axcodes="RAS")), Range("Spacing")(Spacingd( keys=["image", "label"], pixdim=(1.5, 1.5, 2.0), mode=("bilinear", "nearest"), )), Range()(ScaleIntensityRanged( keys=["image"], a_min=-57, a_max=164, b_min=0.0, b_max=1.0, clip=True, )),