def upsample_mags( path: Path, layer_name: Optional[str] = None, from_mag: Optional[Mag] = None, target_mag: Mag = Mag(1), buffer_shape: Optional[Vec3Int] = None, compress: bool = True, args: Optional[Namespace] = None, sampling_mode: Union[str, SamplingModes] = SamplingModes.ANISOTROPIC, ) -> None: assert layer_name and from_mag or not layer_name and not from_mag, ( "You provided only one of the following " "parameters: layer_name, from_mag but both " "need to be set or none. If you don't provide " "the parameters you need to provide the path " "argument with the mag and layer to upsample" " (e.g dataset/color/1)." ) if not layer_name or not from_mag: layer_name = path.parent.name from_mag = Mag(path.name) path = path.parent.parent Dataset.open(path).get_layer(layer_name).upsample( from_mag=from_mag, finest_mag=target_mag, compress=compress, sampling_mode=sampling_mode, buffer_shape=buffer_shape, args=args, )
def test_default_anisotropic_voxel_size(tmp_path: Path) -> None: ds = Dataset(tmp_path / "default_anisotropic_voxel_size", voxel_size=(85, 85, 346)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1) mag.write(data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)) layer.downsample(Mag(1), None, "median", True) assert sorted(layer.mags.keys()) == [Mag("1"), Mag("2-2-1"), Mag("4-4-1")]
def detect_mag_path( dataset_path: Path, layer: str, mag: Mag = Mag(1) ) -> Optional[Path]: layer_path = dataset_path / layer / str(mag) if layer_path.exists(): return layer_path layer_path = dataset_path / layer / mag.to_long_layer_name() if layer_path.exists(): return layer_path return None
def test_in_mag() -> None: with pytest.raises(AssertionError): BoundingBox((1, 2, 3), (9, 9, 9)).in_mag(Mag(2)) with pytest.raises(AssertionError): BoundingBox((2, 2, 2), (9, 9, 9)).in_mag(Mag(2)) assert BoundingBox( (2, 2, 2), (10, 10, 10)).in_mag(Mag(2)) == BoundingBox(topleft=(1, 1, 1), size=(5, 5, 5))
def test_downsample_with_invalid_mag_list(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_mag_list", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1) mag.write(data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)) with pytest.raises(AssertionError): layer.downsample_mag_list( from_mag=Mag(1), target_mags=[Mag(1), Mag([1, 1, 2]), Mag([2, 2, 1]), Mag(2)], )
def test_downsample_mag_list(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_mag_list", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1) mag.write(data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)) target_mags = [Mag([4, 4, 8]), Mag(2), Mag([32, 32, 8]), Mag(32)] # unsorted list layer.downsample_mag_list(from_mag=Mag(1), target_mags=target_mags) for m in target_mags: assert m in layer.mags
def test_default_parameter(tmp_path: Path) -> None: target_path = tmp_path / "downsaple_default" ds = Dataset(target_path, voxel_size=(1, 1, 1)) layer = ds.add_layer("color", COLOR_CATEGORY, dtype_per_channel="uint8", num_channels=3) mag = layer.add_mag("2") mag.write(data=(np.random.rand(3, 10, 20, 30) * 255).astype(np.uint8)) layer.downsample() # The max_mag is Mag(4) in this case (see test_default_max_mag) assert sorted(layer.mags.keys()) == [Mag("2"), Mag("4")]
def test_upsampling(tmp_path: Path) -> None: ds = Dataset(tmp_path, voxel_size=(1, 1, 1)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag([4, 4, 2]) mag.write( absolute_offset=(10 * 4, 20 * 4, 40 * 2), data=(np.random.rand(46, 45, 27) * 255).astype(np.uint8), ) layer.upsample( from_mag=Mag([4, 4, 2]), finest_mag=Mag(1), compress=False, sampling_mode=SamplingModes.ANISOTROPIC, buffer_edge_len=64, args=None, )
def cube_with_args(args: Namespace) -> None: if args.isotropic is not None: raise DeprecationWarning( "The flag 'isotropic' is deprecated. Consider using '--sampling_mode isotropic' instead." ) auto_detect_and_run_conversion(args) layer_path_to_mags: Dict[Path, List[Mag]] = detect_present_mags(args.target_path) if not args.no_compress: for (layer_path, mags) in layer_path_to_mags.items(): layer_name = layer_path.name for mag in mags: compress_mag_inplace(args.target_path, layer_name, mag, args) for (layer_path, mags) in layer_path_to_mags.items(): layer_name = layer_path.name mags.sort() downsample_mags( path=args.target_path, layer_name=layer_name, from_mag=mags[-1], max_mag=None if args.max_mag is None else Mag(args.max_mag), interpolation_mode="default", compress=not args.no_compress, sampling_mode=args.sampling_mode, args=args, ) refresh_metadata(args.target_path)
def detect_segmentation_layer( dataset_path: Path, layer_name: str, max_id: int, compute_max_id: bool = False, exact_bounding_box: Optional[dict] = None, ) -> dict: layer_info = detect_standard_layer( dataset_path, layer_name, exact_bounding_box, category="segmentation" ) layer_info["mappings"] = detect_mappings(dataset_path, layer_name) layer_info["largestSegmentId"] = max_id if compute_max_id: logging.info("Computing max id of layer={}".format(layer_name)) # Computing the current largest segment id # This may take very long due to IO load layer_path = str(detect_mag_path(dataset_path, layer_name, Mag(1))) with wkw.Dataset.open(layer_path) as dataset: bbox = layer_info["boundingBox"] layer_info["largestSegmentId"] = int( np.max( dataset.read( bbox["topLeft"], [bbox["width"], bbox["height"], bbox["depth"]] ) ) ) logging.info( "Max id of layer={} is {}".format( layer_name, layer_info["largestSegmentId"] ) ) return layer_info
def detect_bbox(dataset_path: Path, layer: str, mag: Mag = Mag(1)) -> Optional[dict]: # Detect the coarse bounding box of a dataset by iterating # over the WKW cubes layer_path = detect_mag_path(dataset_path, layer, mag) if layer_path is None: return None cubes_list = [parse_cube_file_name(f) for f in layer_path.rglob("*/*/*.wkw")] if len(cubes_list) == 0: return None xs, ys, zs = list(zip(*cubes_list)) min_x, min_y, min_z = min(xs), min(ys), min(zs) max_x, max_y, max_z = max(xs), max(ys), max(zs) cubeLength = detect_cubeLength(dataset_path, layer, mag) if cubeLength is None: return None return { "topLeft": [min_x * cubeLength, min_y * cubeLength, min_z * cubeLength], "width": (1 + max_x - min_x) * cubeLength, "height": (1 + max_y - min_y) * cubeLength, "depth": (1 + max_z - min_z) * cubeLength, }
def main(args: argparse.Namespace) -> None: source_path = args.source_path if source_path.is_dir(): logger.error("source_path is not a file") return executor_args = get_executor_args(args) mag_view = convert_raw( source_path, args.target_path, args.layer_name, args.input_dtype, args.shape, args.data_format, args.chunk_size, args.chunks_per_shard, args.order, args.voxel_size, args.flip_axes, not args.no_compress, executor_args=executor_args, ) mag_view.layer.downsample( from_mag=mag_view.mag, coarsest_mag=None if args.max_mag is None else Mag(args.max_mag), interpolation_mode=args.interpolation_mode, compress=not args.no_compress, sampling_mode=args.sampling_mode, args=executor_args, )
def main(args: argparse.Namespace) -> None: source_path = args.source_path if not source_path.is_dir(): logger.error("source_path is not a directory") return executor_args = get_executor_args(args) mag_view = convert_zarr( source_path, args.target_path, layer_name=args.layer_name, data_format=args.data_format, chunk_size=args.chunk_size, chunks_per_shard=args.chunks_per_shard, is_segmentation_layer=args.is_segmentation_layer, voxel_size=args.voxel_size, flip_axes=args.flip_axes, compress=not args.no_compress, executor_args=executor_args, ) mag_view.layer.downsample( from_mag=mag_view.mag, coarsest_mag=None if args.max_mag is None else Mag(args.max_mag), interpolation_mode=args.interpolation_mode, compress=not args.no_compress, sampling_mode=args.sampling_mode, args=executor_args, )
def detect_num_channels(dataset_path: Path, layer: str, mag: Mag = Mag(1)) -> int: layer_path = detect_mag_path(dataset_path, layer, mag) if layer_path is not None: with wkw.Dataset.open(str(layer_path)) as dataset: return dataset.header.num_channels raise RuntimeError( f"Failed to detect numChannels (for {dataset_path}, {layer}, {mag}) because the layer_path is None" )
def detect_cubeLength(dataset_path: Path, layer: str, mag: Mag = Mag(1)) -> int: layer_path = detect_mag_path(dataset_path, layer, mag) if layer_path is not None: with wkw.Dataset.open(str(layer_path)) as dataset: return dataset.header.block_len * dataset.header.file_len raise RuntimeError( f"Failed to detect the cube length (for {dataset_path}, {layer}, {mag}) because the layer_path is None" )
def test_downsample_2d(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_compressed", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1, chunk_size=8, chunks_per_shard=8) # write 2D data with all values set to "123" mag.write(data=(np.ones((100, 100, 1)) * 123).astype(np.uint8)) with pytest.warns(Warning): # This call produces a warning because only the mode "CONSTANT_Z" makes sense for 2D data. layer.downsample( from_mag=Mag(1), coarsest_mag=Mag(2), sampling_mode=SamplingModes. ISOTROPIC, # this mode is intentionally not "CONSTANT_Z" for this test ) assert Mag("2-2-1") in layer.mags assert np.all(layer.get_mag( Mag("2-2-1")).read() == 123) # The data is not darkened
def test_upsampling(sample_wkw_path: Path, tmp_path: Path, tiff_mag_2_reference_path: Path) -> None: copytree(sample_wkw_path, tmp_path) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("1") color_layer.bounding_box = color_layer.bounding_box.align_with_mag( Mag("2"), ceil=True) check_call( "python", "-m", "wkcuber.upsampling", "--jobs", 2, "--from_mag", "2-2-2", "--target_mag", 1, "--buffer_cube_size", 1024, "--layer_name", "color", tmp_path, ) color_layer = Dataset.open(tmp_path).get_layer("color") color_layer.delete_mag("2") check_call( "python", "-m", "wkcuber.downsampling", "--jobs", 2, "--from_mag", 1, "--max", 2, "--sampling_mode", "isotropic", "--buffer_cube_size", 256, "--layer_name", "color", "--interpolation_mode", "nearest", tmp_path, ) assert (Dataset.open(tmp_path).get_layer("color").get_mag("2").bounding_box ) == (Dataset.open(tiff_mag_2_reference_path).get_layer( "color").get_mag("2").bounding_box) assert (Dataset.open(tmp_path).get_layer("color").get_mag( "2").content_is_equal( Dataset.open(tiff_mag_2_reference_path).get_layer("color").get_mag( "2")))
def test_align_with_mag_floored() -> None: assert BoundingBox( (1, 1, 1), (10, 10, 10)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 2), size=(8, 8, 8)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 2), size=(8, 8, 8)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(4)) == BoundingBox(topleft=(4, 4, 4), size=(4, 4, 4)) assert BoundingBox( (1, 2, 3), (9, 9, 9)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 4), size=(8, 8, 8))
def test_align_with_mag_ceiled() -> None: assert BoundingBox((1, 1, 1), (10, 10, 10)).align_with_mag( Mag(2), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(12, 12, 12)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(2), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(10, 10, 10)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(4), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(12, 12, 12)) assert BoundingBox( (1, 2, 3), (9, 9, 9)).align_with_mag(Mag(2), ceil=True) == BoundingBox(topleft=(0, 2, 2), size=(10, 10, 10))
def export_wkw_as_nifti(args: Namespace) -> None: setup_logging(args) export_nifti( source_path=args.source_path, source_bbox=args.source_bbox, mag=Mag(args.mag), destination_path=args.destination_path, name=args.name, padding=args.padding, )
def detect_dtype(dataset_path: Path, layer: str, mag: Mag = Mag(1)) -> str: layer_path = detect_mag_path(dataset_path, layer, mag) if layer_path is not None: with wkw.Dataset.open(str(layer_path)) as dataset: voxel_size = dataset.header.voxel_type num_channels = dataset.header.num_channels if voxel_size == np.uint8 and num_channels > 1: return "uint" + str(8 * num_channels) else: return convert_dtype_to_element_class(voxel_size) raise RuntimeError( f"Failed to detect dtype (for {dataset_path}, {layer}, {mag}) because the layer_path is None" )
def sample_wkw_path() -> Path: ds_path = TESTDATA_DIR / "tiff_wkw" if ds_path.exists(): rmtree(ds_path) check_call( [ "python", "-m", "wkcuber.cubing", "--jobs", "2", "--voxel_size", "1,1,1", str(TESTDATA_DIR / "tiff"), str(ds_path), ] ) copytree( TESTDATA_DIR / "tiff" / "datasource-properties.wkw-fixture.json", ds_path / PROPERTIES_FILE_NAME, ) Dataset.open(ds_path).get_layer("color").downsample_mag(Mag(1), Mag(2)) return ds_path
def detect_present_mags(target_path: Path) -> Dict[Path, List[Mag]]: layer_path_to_mags: Dict[Path, List[Mag]] = dict() layer_paths = list([p for p in target_path.iterdir() if p.is_dir()]) for layer_p in layer_paths: layer_path_to_mags.setdefault(layer_p, list()) mag_paths = list([p for p in layer_p.iterdir() if p.is_dir()]) for mag_p in mag_paths: try: mag = Mag(mag_p.name) except (AssertionError, ValueError) as _: continue layer_path_to_mags[layer_p].append(mag) return layer_path_to_mags
def test_downsample_compressed(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_compressed", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1, chunk_size=8, chunks_per_shard=8) mag.write(data=(np.random.rand(80, 240, 15) * 255).astype(np.uint8)) assert not mag._is_compressed() mag.compress() assert mag._is_compressed() layer.downsample( from_mag=Mag(1), coarsest_mag=Mag( 4 ), # Setting max_mag to "4" covers an edge case because the z-dimension (15) has to be rounded ) # Note: this test does not check if the data is correct. This is already covered by other test cases. assert len(layer.mags) == 3 assert Mag("1") in layer.mags.keys() assert Mag("2-2-1") in layer.mags.keys() assert Mag("4-4-2") in layer.mags.keys()
def test_default_max_mag() -> None: assert calculate_default_coarsest_mag(dataset_size=(65536, 65536, 65536)) == Mag(1024) assert calculate_default_coarsest_mag(dataset_size=(4096, 4096, 4096)) == Mag(64) assert calculate_default_coarsest_mag(dataset_size=(131072, 262144, 262144)) == Mag(4096) assert calculate_default_coarsest_mag(dataset_size=(32768, 32768, 32768)) == Mag(512) assert calculate_default_coarsest_mag(dataset_size=(16384, 65536, 65536)) == Mag(1024) assert calculate_default_coarsest_mag(dataset_size=(16384, 65536, 16384)) == Mag(1024) assert calculate_default_coarsest_mag(dataset_size=(256, 256, 256)) == Mag([4, 4, 4])
def downsample_mags( path: Path, layer_name: Optional[str] = None, from_mag: Optional[Mag] = None, max_mag: Optional[Mag] = None, interpolation_mode: str = "default", buffer_shape: Optional[Vec3Int] = None, compress: bool = True, args: Optional[Namespace] = None, sampling_mode: Union[str, SamplingModes] = SamplingModes.ANISOTROPIC, force_sampling_scheme: bool = False, ) -> None: """ Argument `path` expects the directory containing the dataset. Argument `layer_name` expects the name of the layer (color or segmentation). Argument `from_mag` expects the resolution to base downsampling on. For the other parameters see the CLI help or `Layer.downsample` and `Layer.downsampling_mag`. """ assert layer_name and from_mag or not layer_name and not from_mag, ( "You provided only one of the following " "parameters: layer_name, from_mag but both " "need to be set or none. If you don't provide " "the parameters you need to provide the path " "argument with the mag and layer to downsample" " (e.g dataset/color/1).") if not layer_name or not from_mag: layer_name = path.parent.name from_mag = Mag(path.name) path = path.parent.parent assert layer_name is not None # for mypy assert from_mag is not None # for mypy Dataset.open(path).get_layer(layer_name).downsample( from_mag=from_mag, coarsest_mag=max_mag, interpolation_mode=interpolation_mode, compress=compress, sampling_mode=sampling_mode, buffer_shape=buffer_shape, force_sampling_scheme=force_sampling_scheme, args=args, )
def test_export_nifti_file(tmp_path: Path) -> None: destination_path = tmp_path / f"{DS_NAME}_nifti" destination_path.mkdir() bbox = BoundingBox((100, 100, 10), (100, 500, 50)) bbox_dict = bbox.to_config_dict() args_list = [ "--source_path", str(SOURCE_PATH), "--destination_path", str(destination_path), "--name", "test_export", "--source_bbox", bbox.to_csv(), "--mag", "1", ] export_wkw_as_nifti_from_arg_list(args_list) wk_ds = Dataset.open(SOURCE_PATH) for layer_name, layer in wk_ds.layers.items(): correct_image = layer.get_mag(Mag(1)).read(bbox_dict["topleft"], bbox_dict["size"]) # nifti is transposed correct_image = correct_image.transpose(1, 2, 3, 0) correct_image = np.squeeze(correct_image) nifti_path = destination_path.joinpath(f"test_export_{layer_name}.nii") assert nifti_path.is_file( ), f"Expected a nifti to be written at: {nifti_path}." nifti = nib.load(str(nifti_path)) test_image = np.array(nifti.get_fdata()) assert np.array_equal(correct_image, test_image), ( f"The nifti file {nifti_path} that was written is not " f"equal to the original wkw_file.")
def test_downsample_mag_list_with_only_setup_mags(tmp_path: Path) -> None: ds = Dataset(tmp_path / "downsample_mag_list", voxel_size=(1, 1, 2)) layer = ds.add_layer("color", COLOR_CATEGORY) mag = layer.add_mag(1) mag.write(data=(np.random.rand(10, 20, 30) * 255).astype(np.uint8)) target_mags = [Mag([4, 4, 8]), Mag(2), Mag([32, 32, 8]), Mag(32)] # unsorted list layer.downsample_mag_list(from_mag=Mag(1), target_mags=target_mags, only_setup_mags=True) for m in target_mags: assert np.all( layer.get_mag(m).read() == 0), "The mags should be empty." layer.downsample_mag_list(from_mag=Mag(1), target_mags=target_mags, allow_overwrite=True) for m in target_mags: assert m in layer.mags
def tile_cubing( target_path: Path, layer_name: str, batch_size: int, input_path_pattern: str, voxel_size: Tuple[int, int, int], args: Optional[Namespace] = None, ) -> None: decimal_lengths = get_digit_counts_for_dimensions(input_path_pattern) ( min_dimensions, max_dimensions, arbitrary_file, file_count, ) = detect_interval_for_dimensions(input_path_pattern, decimal_lengths) if not arbitrary_file: logging.error( f"No source files found. Maybe the input_path_pattern was wrong. You provided: {input_path_pattern}" ) return # Determine tile size from first matching file tile_width, tile_height = image_reader.read_dimensions(arbitrary_file) num_z = max_dimensions["z"] - min_dimensions["z"] + 1 num_x = (max_dimensions["x"] - min_dimensions["x"] + 1) * tile_width num_y = (max_dimensions["y"] - min_dimensions["y"] + 1) * tile_height x_offset = min_dimensions["x"] * tile_width y_offset = min_dimensions["y"] * tile_height num_channels = image_reader.read_channel_count(arbitrary_file) logging.info("Found source files: count={} with tile_size={}x{}".format( file_count, tile_width, tile_height)) if args is None or not hasattr(args, "dtype") or args.dtype is None: dtype = image_reader.read_dtype(arbitrary_file) else: dtype = args.dtype target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, ) bbox = BoundingBox( Vec3Int(x_offset, y_offset, min_dimensions["z"]), Vec3Int(num_x, num_y, num_z), ) if target_layer.bounding_box.volume() == 0: # If the layer is empty, we want to set the bbox directly because extending it # would mean that the bbox would always start at (0, 0, 0) target_layer.bounding_box = bbox else: target_layer.bounding_box = target_layer.bounding_box.extended_by(bbox) target_mag_view = target_layer.get_or_add_mag( Mag(1), block_len=DEFAULT_CHUNK_SIZE.z) with get_executor_for_args(args) as executor: job_args = [] # Iterate over all z batches for z_batch in get_regular_chunks(min_dimensions["z"], max_dimensions["z"], DEFAULT_CHUNK_SIZE.z): # The z_batch always starts and ends at a multiple of DEFAULT_CHUNK_SIZE.z. # However, we only want the part that is inside the bounding box z_batch = range( max(list(z_batch)[0], target_layer.bounding_box.topleft.z), min( list(z_batch)[-1] + 1, target_layer.bounding_box.bottomright.z), ) z_values = list(z_batch) job_args.append(( target_mag_view.get_view( (x_offset, y_offset, z_values[0]), (num_x, num_y, len(z_values)), ), z_values, input_path_pattern, batch_size, (tile_width, tile_height, num_channels), min_dimensions, max_dimensions, decimal_lengths, dtype, num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(tile_cubing_job, job_args), f"Tile cubing layer {layer_name}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id
def detect_resolutions(dataset_path: Path, layer: str) -> Generator[Mag, None, None]: for mag in (dataset_path / layer).iterdir(): try: yield Mag(mag.name) except ValueError: logging.info("ignoring {} as resolution".format(mag))