def test_set_data(self): data_list1 = list(range(10)) transform = Compose([ Lambda(func=lambda x: np.array([x * 10])), RandLambda(func=lambda x: x + 1) ]) dataset = CacheDataset( data=data_list1, transform=transform, cache_rate=1.0, num_workers=4, progress=True, copy_cache=not sys.platform == "linux", ) num_workers = 2 if sys.platform == "linux" else 0 dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=1) for i, d in enumerate(dataloader): np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d) # simulate another epoch, the cache content should not be modified for i, d in enumerate(dataloader): np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d) # update the datalist and fill the cache content data_list2 = list(range(-10, 0)) dataset.set_data(data=data_list2) # rerun with updated cache content for i, d in enumerate(dataloader): np.testing.assert_allclose([[data_list2[i] * 10 + 1]], d)
def __init__( self, root_dir: str, task: str, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.2, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: int = 0, ) -> None: if not os.path.isdir(root_dir): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.set_random_state(seed=seed) if task not in self.resource: raise ValueError( f"Unsupported task: {task}, available options are: {list(self.resource.keys())}." ) dataset_dir = os.path.join(root_dir, task) tarfile_name = f"{dataset_dir}.tar" if download: download_and_extract(self.resource[task], tarfile_name, root_dir, self.md5[task]) if not os.path.exists(dataset_dir): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) self.indices: np.ndarray = np.array([]) data = self._generate_data_list(dataset_dir) # as `release` key has typo in Task04 config file, ignore it. property_keys = [ "name", "description", "reference", "licence", "tensorImageSize", "modality", "labels", "numTraining", "numTest", ] self._properties = load_decathlon_properties( os.path.join(dataset_dir, "dataset.json"), property_keys) if transform == (): transform = LoadImaged(["image", "label"]) CacheDataset.__init__(self, data, transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers)
def __init__( self, root_dir: PathLike, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.1, test_frac: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: Optional[int] = 1, progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, ) -> None: root_dir = Path(root_dir) if not root_dir.is_dir(): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.test_frac = test_frac self.set_random_state(seed=seed) tarfile_name = root_dir / self.compressed_file_name dataset_dir = root_dir / self.dataset_folder_name self.num_class = 0 if download: download_and_extract( url=self.resource, filepath=tarfile_name, output_dir=root_dir, hash_val=self.md5, hash_type="md5", progress=progress, ) if not dataset_dir.is_dir(): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) data = self._generate_data_list(dataset_dir) if transform == (): transform = LoadImaged("image") CacheDataset.__init__( self, data=data, transform=transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers, progress=progress, copy_cache=copy_cache, as_contiguous=as_contiguous, )
def test_hash_as_key(self, transform, expected_shape): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) with tempfile.TemporaryDirectory() as tempdir: test_data = [] for i in ["1", "2", "2", "3", "3"]: for k in ["image", "label", "extra"]: nib.save(test_image, os.path.join(tempdir, f"{k}{i}.nii.gz")) test_data.append({ k: os.path.join(tempdir, f"{k}{i}.nii.gz") for k in ["image", "label", "extra"] }) dataset = CacheDataset(data=test_data, transform=transform, cache_num=4, num_workers=2, hash_as_key=True) self.assertEqual(len(dataset), 5) # ensure no duplicated cache content self.assertEqual(len(dataset._cache), 3) self.assertEqual(dataset.cache_num, 3) data1 = dataset[0] data2 = dataset[1] data3 = dataset[-1] # test slice indices data4 = dataset[0:-1] self.assertEqual(len(data4), 4) if transform is None: self.assertEqual(data1["image"], os.path.join(tempdir, "image1.nii.gz")) self.assertEqual(data2["label"], os.path.join(tempdir, "label2.nii.gz")) self.assertEqual(data3["image"], os.path.join(tempdir, "image3.nii.gz")) else: self.assertTupleEqual(data1["image"].shape, expected_shape) self.assertTupleEqual(data2["label"].shape, expected_shape) self.assertTupleEqual(data3["image"].shape, expected_shape) for d in data4: self.assertTupleEqual(d["image"].shape, expected_shape) test_data2 = test_data[:3] dataset.set_data(data=test_data2) self.assertEqual(len(dataset), 3) # ensure no duplicated cache content self.assertEqual(len(dataset._cache), 2) self.assertEqual(dataset.cache_num, 2)
def test_shape(self, transform, expected_shape): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) with tempfile.TemporaryDirectory() as tempdir: test_data = [] for i in ["1", "2"]: for k in ["image", "label", "extra"]: nib.save(test_image, os.path.join(tempdir, f"{k}{i}.nii.gz")) test_data.append({ k: os.path.join(tempdir, f"{k}{i}.nii.gz") for k in ["image", "label", "extra"] }) dataset = CacheDataset(data=test_data, transform=transform, cache_rate=0.5, as_contiguous=True) data1 = dataset[0] data2 = dataset[1] data3 = dataset[0:-1] data4 = dataset[-1] self.assertEqual(len(data3), 1) if transform is None: # Check without providing transfrom dataset2 = CacheDataset(data=test_data, cache_rate=0.5, as_contiguous=True) for k in ["image", "label", "extra"]: self.assertEqual(dataset[0][k], dataset2[0][k]) if transform is None: self.assertEqual(data1["image"], os.path.join(tempdir, "image1.nii.gz")) self.assertEqual(data2["label"], os.path.join(tempdir, "label2.nii.gz")) self.assertEqual(data4["image"], os.path.join(tempdir, "image2.nii.gz")) else: self.assertTupleEqual(data1["image"].shape, expected_shape) self.assertTupleEqual(data1["label"].shape, expected_shape) self.assertTupleEqual(data1["extra"].shape, expected_shape) self.assertTupleEqual(data2["image"].shape, expected_shape) self.assertTupleEqual(data2["label"].shape, expected_shape) self.assertTupleEqual(data2["extra"].shape, expected_shape) for d in data3: self.assertTupleEqual(d["image"].shape, expected_shape)
def test_shape(self, expected_shape): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) tempdir = tempfile.mkdtemp() nib.save(test_image, os.path.join(tempdir, 'test_image1.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_label1.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_extra1.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_image2.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_label2.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_extra2.nii.gz')) test_data = [{ 'image': os.path.join(tempdir, 'test_image1.nii.gz'), 'label': os.path.join(tempdir, 'test_label1.nii.gz'), 'extra': os.path.join(tempdir, 'test_extra1.nii.gz') }, { 'image': os.path.join(tempdir, 'test_image2.nii.gz'), 'label': os.path.join(tempdir, 'test_label2.nii.gz'), 'extra': os.path.join(tempdir, 'test_extra2.nii.gz') }] dataset = CacheDataset( data=test_data, transform=Compose([LoadNiftid(keys=['image', 'label', 'extra'])]), cache_rate=0.5) data1 = dataset[0] data2 = dataset[1] shutil.rmtree(tempdir) self.assertTupleEqual(data1['image'].shape, expected_shape) self.assertTupleEqual(data1['label'].shape, expected_shape) self.assertTupleEqual(data1['extra'].shape, expected_shape) self.assertTupleEqual(data2['image'].shape, expected_shape) self.assertTupleEqual(data2['label'].shape, expected_shape) self.assertTupleEqual(data2['extra'].shape, expected_shape)
def _get_loader(self, folders): images = [] segs = [] for folder in folders: images += glob(os.path.join(folder, "*_im.nii.gz")) segs += glob(os.path.join(folder, "*_seg.nii.gz")) images = sorted(images, key=os.path.basename) segs = sorted(segs, key=os.path.basename) files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)] transforms = Compose([ LoadImaged(keys=["img", "seg"]), AsChannelFirstd(keys=["img", "seg"], channel_dim=-1), ScaleIntensityd(keys="img"), ToTensord(keys=["img", "seg"]), ]) ds = CacheDataset(data=files, transform=transforms) loader = DataLoader(ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) return loader
def test_shape(self, expected_shape): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) tempdir = tempfile.mkdtemp() nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_image2.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_label2.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_extra2.nii.gz")) test_data = [ { "image": os.path.join(tempdir, "test_image1.nii.gz"), "label": os.path.join(tempdir, "test_label1.nii.gz"), "extra": os.path.join(tempdir, "test_extra1.nii.gz"), }, { "image": os.path.join(tempdir, "test_image2.nii.gz"), "label": os.path.join(tempdir, "test_label2.nii.gz"), "extra": os.path.join(tempdir, "test_extra2.nii.gz"), }, ] dataset = CacheDataset( data=test_data, transform=Compose([LoadNiftid(keys=["image", "label", "extra"])]), cache_rate=0.5) data1 = dataset[0] data2 = dataset[1] shutil.rmtree(tempdir) self.assertTupleEqual(data1["image"].shape, expected_shape) self.assertTupleEqual(data1["label"].shape, expected_shape) self.assertTupleEqual(data1["extra"].shape, expected_shape) self.assertTupleEqual(data2["image"].shape, expected_shape) self.assertTupleEqual(data2["label"].shape, expected_shape) self.assertTupleEqual(data2["extra"].shape, expected_shape)
def test_decollation(self, batch_size=2, num_workers=2): im = create_test_image_2d(100, 101)[0] data = [{ "image": make_nifti_image(im) if has_nib else im } for _ in range(6)] transforms = Compose([ AddChanneld("image"), SpatialPadd("image", 150), RandFlipd("image", prob=1.0, spatial_axis=1), ToTensord("image"), ]) # If nibabel present, read from disk if has_nib: transforms = Compose([LoadImaged("image"), transforms]) dataset = CacheDataset(data, transforms, progress=False) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) for b, batch_data in enumerate(loader): decollated_1 = decollate_batch(batch_data) decollated_2 = Decollated()(batch_data) for decollated in [decollated_1, decollated_2]: for i, d in enumerate(decollated): self.check_match(dataset[b * batch_size + i], d)
def test_pad_collation(self, t_type, collate_method, transform): if t_type == dict: dataset = CacheDataset(self.dict_data, transform, progress=False) else: dataset = _Dataset(self.list_data, self.list_labels, transform) # Default collation should raise an error loader_fail = DataLoader(dataset, batch_size=10) with self.assertRaises(RuntimeError): for _ in loader_fail: pass # Padded collation shouldn't loader = DataLoader(dataset, batch_size=10, collate_fn=collate_method) # check collation in forward direction for data in loader: if t_type == dict: shapes = [] decollated_data = decollate_batch(data) for d in decollated_data: output = PadListDataCollate.inverse(d) shapes.append(output["image"].shape) self.assertTrue( len(set(shapes)) > 1 ) # inverted shapes must be different because of random xforms
def test_collation(self, _, transform, collate_fn, ndim): data = self.data_3d if ndim == 3 else self.data_2d if collate_fn: modified_transform = transform else: modified_transform = Compose( [transform, ResizeWithPadOrCropd(KEYS, 100), ToTensord(KEYS)]) # num workers = 0 for mac or gpu transforms num_workers = 0 if sys.platform != "linux" or torch.cuda.is_available( ) else 2 dataset = CacheDataset(data, transform=modified_transform, progress=False) loader = DataLoader(dataset, num_workers, batch_size=self.batch_size, collate_fn=collate_fn) for item in loader: np.testing.assert_array_equal( item["image_transforms"][0]["do_transforms"], item["label_transforms"][0]["do_transforms"])
def test_decollation(self, *transforms): batch_size = 2 num_workers = 2 t_compose = Compose( [AddChanneld(KEYS), Compose(transforms), ToTensord(KEYS)]) # If nibabel present, read from disk if has_nib: t_compose = Compose([LoadImaged("image"), t_compose]) dataset = CacheDataset(self.data, t_compose, progress=False) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) for b, batch_data in enumerate(loader): decollated_1 = decollate_batch(batch_data) decollated_2 = Decollated()(batch_data) for decollated in [decollated_1, decollated_2]: for i, d in enumerate(decollated): self.check_match(dataset[b * batch_size + i], d)
def test_values(self): datalist = [ { "image": "spleen_19.nii.gz", "label": "spleen_label_19.nii.gz" }, { "image": "spleen_31.nii.gz", "label": "spleen_label_31.nii.gz" }, ] transform = Compose([ DataStatsd(keys=["image", "label"], data_shape=False, value_range=False, data_value=True), SimulateDelayd(keys=["image", "label"], delay_time=0.1), ]) dataset = CacheDataset(data=datalist, transform=transform, cache_rate=0.5, cache_num=1) dataloader = DataLoader(dataset=dataset, batch_size=2, num_workers=2) for d in dataloader: self.assertEqual(d["image"][0], "spleen_19.nii.gz") self.assertEqual(d["image"][1], "spleen_31.nii.gz") self.assertEqual(d["label"][0], "spleen_label_19.nii.gz") self.assertEqual(d["label"][1], "spleen_label_31.nii.gz")
def test_epistemic_scoring(self): input_size = (20, 20, 20) device = "cuda" if torch.cuda.is_available() else "cpu" keys = ["image", "label"] num_training_ims = 10 train_data = self.get_data(num_training_ims, input_size) test_data = self.get_data(1, input_size) transforms = Compose([ AddChanneld(keys), CropForegroundd(keys, source_key="image"), DivisiblePadd(keys, 4), ]) infer_transforms = Compose([ AddChannel(), CropForeground(), DivisiblePad(4), ]) train_ds = CacheDataset(train_data, transforms) # output might be different size, so pad so that they match train_loader = DataLoader(train_ds, batch_size=2, collate_fn=pad_list_data_collate) model = UNet(3, 1, 1, channels=(6, 6), strides=(2, 2)).to(device) loss_function = DiceLoss(sigmoid=True) optimizer = torch.optim.Adam(model.parameters(), 1e-3) num_epochs = 10 for _ in trange(num_epochs): epoch_loss = 0 for batch_data in train_loader: inputs, labels = batch_data["image"].to( device), batch_data["label"].to(device) optimizer.zero_grad() outputs = model(inputs) loss = loss_function(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_loss /= len(train_loader) entropy_score = EpistemicScoring(model=model, transforms=infer_transforms, roi_size=[20, 20, 20], num_samples=10) # Call Individual Infer from Epistemic Scoring ip_stack = [test_data["image"], test_data["image"], test_data["image"]] ip_stack = np.array(ip_stack) score_3d = entropy_score.entropy_3d_volume(ip_stack) score_3d_sum = np.sum(score_3d) # Call Entropy Metric from Epistemic Scoring self.assertEqual(score_3d.shape, input_size) self.assertIsInstance(score_3d_sum, np.float32) self.assertGreater(score_3d_sum, 3.0)
def test_decollation_dict(self, *transforms): t_compose = Compose([AddChanneld(KEYS), Compose(transforms), ToTensord(KEYS)]) # If nibabel present, read from disk if has_nib: t_compose = Compose([LoadImaged("image"), t_compose]) dataset = CacheDataset(self.data_dict, t_compose, progress=False) self.check_decollate(dataset=dataset)
def test_inverse_inferred_seg(self): test_data = [] for _ in range(20): image, label = create_test_image_2d(100, 101) test_data.append({ "image": image, "label": label.astype(np.float32) }) batch_size = 10 # num workers = 0 for mac num_workers = 2 if sys.platform != "darwin" else 0 transforms = Compose([ AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), CenterSpatialCropd(KEYS, (110, 99)) ]) num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform)) dataset = CacheDataset(test_data, transform=transforms, progress=False) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) device = "cuda" if torch.cuda.is_available() else "cpu" model = UNet( dimensions=2, in_channels=1, out_channels=1, channels=(2, 4), strides=(2, ), ).to(device) data = first(loader) labels = data["label"].to(device) segs = model(labels).detach().cpu() label_transform_key = "label" + InverseKeys.KEY_SUFFIX.value segs_dict = { "label": segs, label_transform_key: data[label_transform_key] } segs_dict_decollated = decollate_batch(segs_dict) # inverse of individual segmentation seg_dict = first(segs_dict_decollated) with allow_missing_keys_mode(transforms): inv_seg = transforms.inverse(seg_dict)["label"] self.assertEqual(len(data["label_transforms"]), num_invertible_transforms) self.assertEqual(len(seg_dict["label_transforms"]), num_invertible_transforms) self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape)
def test_inverse_inferred_seg(self, extra_transform): test_data = [] for _ in range(20): image, label = create_test_image_2d(100, 101) test_data.append({ "image": image, "label": label.astype(np.float32) }) batch_size = 10 # num workers = 0 for mac num_workers = 2 if sys.platform == "linux" else 0 transforms = Compose([ AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform ]) dataset = CacheDataset(test_data, transform=transforms, progress=False) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) device = "cuda" if torch.cuda.is_available() else "cpu" model = UNet(spatial_dims=2, in_channels=1, out_channels=1, channels=(2, 4), strides=(1, )).to(device) data = first(loader) self.assertEqual(data["image"].shape[0], batch_size * NUM_SAMPLES) labels = data["label"].to(device) self.assertIsInstance(labels, MetaTensor) segs = model(labels).detach().cpu() segs_decollated = decollate_batch(segs) self.assertIsInstance(segs_decollated[0], MetaTensor) # inverse of individual segmentation seg_metatensor = first(segs_decollated) # test to convert interpolation mode for 1 data of model output batch convert_applied_interp_mode(seg_metatensor.applied_operations, mode="nearest", align_corners=None) # manually invert the last crop samples xform = seg_metatensor.applied_operations.pop(-1) shape_before_extra_xform = xform["orig_size"] resizer = ResizeWithPadOrCrop(spatial_size=shape_before_extra_xform) with resizer.trace_transform(False): seg_metatensor = resizer(seg_metatensor) with allow_missing_keys_mode(transforms): inv_seg = transforms.inverse({"label": seg_metatensor})["label"] self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape)
def __init__( self, root_dir: str, section: str, transform: Union[Sequence[Callable], Callable] = (), download: bool = False, seed: int = 0, val_frac: float = 0.1, test_frac: float = 0.1, cache_num: int = sys.maxsize, cache_rate: float = 1.0, num_workers: int = 0, ) -> None: if not os.path.isdir(root_dir): raise ValueError("Root directory root_dir must be a directory.") self.section = section self.val_frac = val_frac self.test_frac = test_frac self.set_random_state(seed=seed) tarfile_name = os.path.join(root_dir, self.compressed_file_name) dataset_dir = os.path.join(root_dir, self.dataset_folder_name) self.num_class = 0 if download: download_and_extract(self.resource, tarfile_name, root_dir, self.md5) if not os.path.exists(dataset_dir): raise RuntimeError( f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it." ) data = self._generate_data_list(dataset_dir) if transform == (): transform = LoadImaged("image") CacheDataset.__init__(self, data, transform, cache_num=cache_num, cache_rate=cache_rate, num_workers=num_workers)
def _get_predictions_iterator(self, segs): files = [{"seg": seg} for seg in segs] transforms = Compose([ LoadImaged(keys=["seg"]), AsChannelFirstd(keys=["seg"], channel_dim=-1), ToTensord(keys=["seg"]), ]) ds = CacheDataset(data=files, transform=transforms) loader = DataLoader(ds, batch_size=1, num_workers=4, collate_fn=list_data_collate) for data in loader: yield (data["seg"], data["seg_meta_dict"])
def test_value(self): device = "cuda:0" data = [{"img": torch.tensor(i)} for i in range(4)] dataset = CacheDataset(data=data, transform=ToDeviced(keys="img", device=device, non_blocking=True), cache_rate=1.0) dataloader = ThreadDataLoader(dataset=dataset, num_workers=0, batch_size=1) for i, d in enumerate(dataloader): torch.testing.assert_allclose(d["img"], torch.tensor([i], device=device))
def test_collation(self, _, transform, collate_fn): if collate_fn: modified_transform = transform else: modified_transform = Compose([transform, ResizeWithPadOrCropd(KEYS, [100, 100, 100])]) # num workers = 0 for mac num_workers = 2 if sys.platform != "darwin" else 0 dataset = CacheDataset(self.data, transform=modified_transform, progress=False) loader = DataLoader(dataset, num_workers, batch_size=self.batch_size, collate_fn=collate_fn) for _ in loader: pass
def test_duplicate_transforms(self): im, _ = create_test_image_2d(128, 128, num_seg_classes=1, channel_dim=0) data = [{"img": im} for _ in range(2)] # at least 1 deterministic followed by at least 1 random transform = Compose([Spacingd("img", pixdim=(1, 1)), RandAffined("img", prob=1.0)]) # cachedataset and data loader w persistent_workers train_ds = CacheDataset(data, transform, cache_num=1) train_loader = DataLoader(train_ds, num_workers=2, persistent_workers=True) b1 = next(iter(train_loader)) b2 = next(iter(train_loader)) self.assertEqual(len(b1["img_transforms"]), len(b2["img_transforms"]))
def test_duplicate_transforms(self): data = [{"img": create_test_image_2d(128, 128, num_seg_classes=1, channel_dim=0)[0]} for _ in range(2)] # at least 1 deterministic followed by at least 1 random transform = Compose([Spacingd("img", pixdim=(1, 1)), RandAffined("img", prob=1.0)]) # cachedataset and data loader w persistent_workers train_ds = CacheDataset(data, transform, cache_num=1) # num_workers > 1 may fail randomly with 21.09 on A100 test node # https://github.com/Project-MONAI/MONAI/issues/3283 train_loader = DataLoader(train_ds, num_workers=1, persistent_workers=True) b1 = next(iter(train_loader)) b2 = next(iter(train_loader)) self.assertEqual(len(b1["img"].applied_operations), len(b2["img"].applied_operations))
def test_shape(self, transform, expected_shape): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) with tempfile.TemporaryDirectory() as tempdir: nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_image2.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_label2.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_extra2.nii.gz")) test_data = [ { "image": os.path.join(tempdir, "test_image1.nii.gz"), "label": os.path.join(tempdir, "test_label1.nii.gz"), "extra": os.path.join(tempdir, "test_extra1.nii.gz"), }, { "image": os.path.join(tempdir, "test_image2.nii.gz"), "label": os.path.join(tempdir, "test_label2.nii.gz"), "extra": os.path.join(tempdir, "test_extra2.nii.gz"), }, ] dataset = CacheDataset(data=test_data, transform=transform, cache_rate=0.5) data1 = dataset[0] data2 = dataset[1] data3 = dataset[0:-1] data4 = dataset[-1] self.assertEqual(len(data3), 1) if transform is None: self.assertEqual(data1["image"], os.path.join(tempdir, "test_image1.nii.gz")) self.assertEqual(data2["label"], os.path.join(tempdir, "test_label2.nii.gz")) self.assertEqual(data4["image"], os.path.join(tempdir, "test_image2.nii.gz")) else: self.assertTupleEqual(data1["image"].shape, expected_shape) self.assertTupleEqual(data1["label"].shape, expected_shape) self.assertTupleEqual(data1["extra"].shape, expected_shape) self.assertTupleEqual(data2["image"].shape, expected_shape) self.assertTupleEqual(data2["label"].shape, expected_shape) self.assertTupleEqual(data2["extra"].shape, expected_shape) for d in data3: self.assertTupleEqual(d["image"].shape, expected_shape)
def test_transforms(self, case_id): set_determinism(2022) config = ConfigParser() config.read_config(TEST_CASES) config["input_keys"] = keys test_case = config.get_parsed_content(id=case_id, instantiate=True) # transform instance dataset = CacheDataset(self.files, transform=test_case) loader = DataLoader(dataset, batch_size=3, shuffle=True) for x in loader: self.assertIsInstance(x[keys[0]], MetaTensor) self.assertIsInstance(x[keys[1]], MetaTensor) out = decollate_batch(x) # decollate every batch should work # test forward patches loaded = out[0] self.assertEqual(len(loaded), len(keys)) img, seg = loaded[keys[0]], loaded[keys[1]] expected = config.get_parsed_content(id=f"{case_id}_answer", instantiate=True) # expected results self.assertEqual(expected["load_shape"], list(x[keys[0]].shape)) assert_allclose(expected["affine"], img.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) assert_allclose(expected["affine"], seg.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) test_cls = [type(x).__name__ for x in test_case.transforms] tracked_cls = [x[TraceKeys.CLASS_NAME] for x in img.applied_operations] self.assertTrue(len(tracked_cls) <= len(test_cls)) # tracked items should be no more than the compose items. with tempfile.TemporaryDirectory() as tempdir: # test writer SaveImageD(keys, resample=False, output_dir=tempdir, output_postfix=case_id)(loaded) # test inverse inv = InvertD(keys, orig_keys=keys, transform=test_case, nearest_interp=True) out = inv(loaded) img, seg = out[keys[0]], out[keys[1]] assert_allclose(expected["inv_affine"], img.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) assert_allclose(expected["inv_affine"], seg.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) self.assertFalse(img.applied_operations) self.assertFalse(seg.applied_operations) assert_allclose(expected["inv_shape"], img.shape, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) assert_allclose(expected["inv_shape"], seg.shape, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF) with tempfile.TemporaryDirectory() as tempdir: # test writer SaveImageD(keys, resample=False, output_dir=tempdir, output_postfix=case_id)(out) seg_file = os.path.join(tempdir, key_1, f"{key_1}_{case_id}.nii.gz") segout = nib.load(seg_file).get_fdata() segin = nib.load(FILE_PATH_1).get_fdata() ndiff = np.sum(np.abs(segout - segin) > 0) total = np.prod(segout.shape) self.assertTrue(ndiff / total < 0.4, f"{ndiff / total}")
def test_inverse_inferred_seg(self, extra_transform): test_data = [] for _ in range(20): image, label = create_test_image_2d(100, 101) test_data.append({"image": image, "label": label.astype(np.float32)}) batch_size = 10 # num workers = 0 for mac num_workers = 2 if sys.platform == "linux" else 0 transforms = Compose([AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform]) num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform)) dataset = CacheDataset(test_data, transform=transforms, progress=False) loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) device = "cuda" if torch.cuda.is_available() else "cpu" model = UNet(spatial_dims=2, in_channels=1, out_channels=1, channels=(2, 4), strides=(2,)).to(device) data = first(loader) self.assertEqual(len(data["label_transforms"]), num_invertible_transforms) self.assertEqual(data["image"].shape[0], batch_size * NUM_SAMPLES) labels = data["label"].to(device) segs = model(labels).detach().cpu() label_transform_key = "label" + InverseKeys.KEY_SUFFIX segs_dict = {"label": segs, label_transform_key: data[label_transform_key]} segs_dict_decollated = decollate_batch(segs_dict) # inverse of individual segmentation seg_dict = first(segs_dict_decollated) # test to convert interpolation mode for 1 data of model output batch convert_inverse_interp_mode(seg_dict, mode="nearest", align_corners=None) with allow_missing_keys_mode(transforms): inv_seg = transforms.inverse(seg_dict)["label"] self.assertEqual(len(data["label_transforms"]), num_invertible_transforms) self.assertEqual(len(seg_dict["label_transforms"]), num_invertible_transforms) self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape) # Inverse of batch batch_inverter = BatchInverseTransform(transforms, loader, collate_fn=no_collation, detach=True) with allow_missing_keys_mode(transforms): inv_batch = batch_inverter(segs_dict) self.assertEqual(inv_batch[0]["label"].shape[1:], test_data[0]["label"].shape)
def test_shape(self, num_workers, dataset_size, transform): test_image = nib.Nifti1Image(np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) tempdir = tempfile.mkdtemp() nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz")) nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz")) test_data = [ { "image": os.path.join(tempdir, "test_image1.nii.gz"), "label": os.path.join(tempdir, "test_label1.nii.gz"), "extra": os.path.join(tempdir, "test_extra1.nii.gz"), } ] * dataset_size dataset = CacheDataset(data=test_data, transform=transform, cache_rate=1, num_workers=num_workers,) shutil.rmtree(tempdir) self.assertEqual(len(dataset._cache), dataset.cache_num) for i in range(dataset.cache_num): self.assertIsNotNone(dataset._cache[i])
def test_pad_collation(self, t_type, transform): if t_type == dict: dataset = CacheDataset(self.dict_data, transform, progress=False) else: dataset = _Dataset(self.list_data, self.list_labels, transform) # Default collation should raise an error loader_fail = DataLoader(dataset, batch_size=10) with self.assertRaises(RuntimeError): for _ in loader_fail: pass # Padded collation shouldn't loader = DataLoader(dataset, batch_size=2, collate_fn=pad_list_data_collate) for _ in loader: pass
def test_pad_collation(self, t_type, collate_method, transform): if t_type == dict: dataset = CacheDataset(self.dict_data, transform, progress=False) else: dataset = _Dataset(self.list_data, self.list_labels, transform) # Default collation should raise an error loader_fail = DataLoader(dataset, batch_size=10) with self.assertRaises(RuntimeError): for _ in loader_fail: pass # Padded collation shouldn't loader = DataLoader(dataset, batch_size=10, collate_fn=collate_method) # check collation in forward direction for data in loader: if t_type == dict: decollated_data = decollate_batch(data) for d in decollated_data: PadListDataCollate.inverse(d)
def test_shape(self, num_workers, dataset_size): test_image = nib.Nifti1Image( np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4)) tempdir = tempfile.mkdtemp() nib.save(test_image, os.path.join(tempdir, 'test_image1.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_label1.nii.gz')) nib.save(test_image, os.path.join(tempdir, 'test_extra1.nii.gz')) test_data = [{ 'image': os.path.join(tempdir, 'test_image1.nii.gz'), 'label': os.path.join(tempdir, 'test_label1.nii.gz'), 'extra': os.path.join(tempdir, 'test_extra1.nii.gz') }] * dataset_size dataset = CacheDataset( data=test_data, transform=Compose([LoadNiftid(keys=['image', 'label', 'extra'])]), cache_rate=1, num_workers=num_workers) shutil.rmtree(tempdir) self.assertEqual(len(dataset._cache), dataset.cache_num) for i in range(dataset.cache_num): self.assertIsNotNone(dataset._cache[i])