def test_in_mag() -> None: with pytest.raises(AssertionError): BoundingBox((1, 2, 3), (9, 9, 9)).in_mag(Mag(2)) with pytest.raises(AssertionError): BoundingBox((2, 2, 2), (9, 9, 9)).in_mag(Mag(2)) assert BoundingBox( (2, 2, 2), (10, 10, 10)).in_mag(Mag(2)) == BoundingBox(topleft=(1, 1, 1), size=(5, 5, 5))
def test_align_with_mag_against_numpy_implementation( bb: BoundingBox, mag: Mag, ceil: bool, ) -> None: try: slow_np_result = bb._align_with_mag_slow(mag, ceil) # Very large numbers don't fit into the C-int anymore: except OverflowError: bb.align_with_mag(mag, ceil) else: # The slower numpy implementation is wrong for very large numbers: if all(i < 12e15 for i in bb.bottomright): assert bb.align_with_mag(mag, ceil) == slow_np_result
def convert_zarr( source_zarr_path: Path, target_path: Path, layer_name: str, data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, is_segmentation_layer: bool = False, voxel_size: Optional[Tuple[float, float, float]] = (1.0, 1.0, 1.0), flip_axes: Optional[Union[int, Tuple[int, ...]]] = None, compress: bool = True, executor_args: Optional[argparse.Namespace] = None, ) -> MagView: ref_time = time.time() f = zarr.open(store=_fsstore_from_path(source_zarr_path), mode="r") input_dtype = f.dtype shape = f.shape if voxel_size is None: voxel_size = 1.0, 1.0, 1.0 wk_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) wk_layer = wk_ds.get_or_add_layer( layer_name, "segmentation" if is_segmentation_layer else "color", dtype_per_layer=np.dtype(input_dtype), num_channels=1, largest_segment_id=0, data_format=data_format, ) wk_layer.bounding_box = BoundingBox((0, 0, 0), shape) wk_mag = wk_layer.get_or_add_mag("1", chunk_size=chunk_size, chunks_per_shard=chunks_per_shard, compress=compress) # Parallel chunk conversion with get_executor_for_args(executor_args) as executor: largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures( partial( _zarr_chunk_converter, source_zarr_path=source_zarr_path, target_mag_view=wk_mag, flip_axes=flip_axes, ), wk_layer.bounding_box.chunk(chunk_size=chunk_size * chunks_per_shard), )) if is_segmentation_layer: largest_segment_id = int(max(largest_segment_id_per_chunk)) cast(SegmentationLayer, wk_layer).largest_segment_id = largest_segment_id logger.debug("Conversion of {} took {:.8f}s".format( source_zarr_path, time.time() - ref_time)) return wk_mag
def test_export_nifti_file(tmp_path: Path) -> None: destination_path = tmp_path / f"{DS_NAME}_nifti" destination_path.mkdir() bbox = BoundingBox((100, 100, 10), (100, 500, 50)) bbox_dict = bbox.to_config_dict() args_list = [ "--source_path", str(SOURCE_PATH), "--destination_path", str(destination_path), "--name", "test_export", "--source_bbox", bbox.to_csv(), "--mag", "1", ] export_wkw_as_nifti_from_arg_list(args_list) wk_ds = Dataset.open(SOURCE_PATH) for layer_name, layer in wk_ds.layers.items(): correct_image = layer.get_mag(Mag(1)).read(bbox_dict["topleft"], bbox_dict["size"]) # nifti is transposed correct_image = correct_image.transpose(1, 2, 3, 0) correct_image = np.squeeze(correct_image) nifti_path = destination_path.joinpath(f"test_export_{layer_name}.nii") assert nifti_path.is_file( ), f"Expected a nifti to be written at: {nifti_path}." nifti = nib.load(str(nifti_path)) test_image = np.array(nifti.get_fdata()) assert np.array_equal(correct_image, test_image), ( f"The nifti file {nifti_path} that was written is not " f"equal to the original wkw_file.")
def convert_raw( source_raw_path: Path, target_path: Path, layer_name: str, input_dtype: str, shape: Tuple[int, int, int], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, order: str = "F", voxel_size: Optional[Tuple[float, float, float]] = (1.0, 1.0, 1.0), flip_axes: Optional[Union[int, Tuple[int, ...]]] = None, compress: bool = True, executor_args: Optional[argparse.Namespace] = None, ) -> MagView: assert order in ("C", "F") time_start(f"Conversion of {source_raw_path}") if voxel_size is None: voxel_size = 1.0, 1.0, 1.0 wk_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) wk_layer = wk_ds.get_or_add_layer( layer_name, "color", dtype_per_layer=np.dtype(input_dtype), num_channels=1, data_format=data_format, ) wk_layer.bounding_box = BoundingBox((0, 0, 0), shape) wk_mag = wk_layer.get_or_add_mag("1", chunk_size=chunk_size, chunks_per_shard=chunks_per_shard, compress=compress) # Parallel chunk conversion with get_executor_for_args(executor_args) as executor: wait_and_ensure_success( executor.map_to_futures( partial( _raw_chunk_converter, source_raw_path=source_raw_path, target_mag_view=wk_mag, input_dtype=input_dtype, shape=shape, order=order, flip_axes=flip_axes, ), wk_layer.bounding_box.chunk(chunk_size=chunk_size * chunks_per_shard), )) time_stop(f"Conversion of {source_raw_path}") return wk_mag
def test_negative_size() -> None: assert BoundingBox((10, 10, 10), (-5, 5, 5)) == BoundingBox((5, 10, 10), (5, 5, 5)) assert BoundingBox((10, 10, 10), (-5, 5, -5)) == BoundingBox((5, 10, 5), (5, 5, 5)) assert BoundingBox((10, 10, 10), (-5, 5, -50)) == BoundingBox((5, 10, -40), (5, 5, 50))
def convert_knossos( source_path: Path, target_path: Path, layer_name: str, dtype: str, voxel_size: Tuple[float, float, float], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, mag: int = 1, args: Optional[Namespace] = None, ) -> None: source_knossos_info = KnossosDatasetInfo(source_path, dtype) target_dataset = Dataset(target_path, voxel_size, exist_ok=True) target_layer = target_dataset.get_or_add_layer( layer_name, COLOR_CATEGORY, data_format=data_format, dtype_per_channel=dtype, ) with open_knossos(source_knossos_info) as source_knossos: knossos_cubes = np.array(list(source_knossos.list_cubes())) if len(knossos_cubes) == 0: logging.error( "No input KNOSSOS cubes found. Make sure to pass the path which points to a KNOSSOS magnification (e.g., testdata/knossos/color/1)." ) exit(1) min_xyz = knossos_cubes.min(axis=0) * CUBE_EDGE_LEN max_xyz = (knossos_cubes.max(axis=0) + 1) * CUBE_EDGE_LEN target_layer.bounding_box = BoundingBox( Vec3Int(min_xyz), Vec3Int(max_xyz - min_xyz) ) target_mag = target_layer.get_or_add_mag( mag, chunk_size=chunk_size, chunks_per_shard=chunks_per_shard ) with get_executor_for_args(args) as executor: target_mag.for_each_chunk( partial(convert_cube_job, source_knossos_info), chunk_size=chunk_size * chunks_per_shard, executor=executor, progress_desc=f"Converting knossos layer {layer_name}", )
def test_align_with_mag_floored() -> None: assert BoundingBox( (1, 1, 1), (10, 10, 10)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 2), size=(8, 8, 8)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 2), size=(8, 8, 8)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(4)) == BoundingBox(topleft=(4, 4, 4), size=(4, 4, 4)) assert BoundingBox( (1, 2, 3), (9, 9, 9)).align_with_mag(Mag(2)) == BoundingBox(topleft=(2, 2, 4), size=(8, 8, 8))
def test_with_bounds() -> None: assert BoundingBox( (1, 2, 3), (5, 5, 5)).with_bounds_x(0, 10) == BoundingBox((0, 2, 3), (10, 5, 5)) assert BoundingBox( (1, 2, 3), (5, 5, 5)).with_bounds_y(new_topleft_y=0) == BoundingBox( (1, 0, 3), (5, 5, 5)) assert BoundingBox((1, 2, 3), (5, 5, 5)).with_bounds_z(new_size_z=10) == BoundingBox( (1, 2, 3), (5, 5, 10))
def test_align_with_mag_ceiled() -> None: assert BoundingBox((1, 1, 1), (10, 10, 10)).align_with_mag( Mag(2), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(12, 12, 12)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(2), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(10, 10, 10)) assert BoundingBox( (1, 1, 1), (9, 9, 9)).align_with_mag(Mag(4), ceil=True) == BoundingBox(topleft=(0, 0, 0), size=(12, 12, 12)) assert BoundingBox( (1, 2, 3), (9, 9, 9)).align_with_mag(Mag(2), ceil=True) == BoundingBox(topleft=(0, 2, 2), size=(10, 10, 10))
def _zarr_chunk_converter( bounding_box: BoundingBox, source_zarr_path: Path, target_mag_view: MagView, flip_axes: Optional[Union[int, Tuple[int, ...]]], ) -> int: logging.info(f"Conversion of {bounding_box.topleft}") slices = bounding_box.to_slices() zarr_file = zarr.open(store=_fsstore_from_path(source_zarr_path), mode="r") source_data = zarr_file[slices][None, ...] if flip_axes: source_data = np.flip(source_data, flip_axes) contiguous_chunk = source_data.copy(order="F") target_mag_view.write(contiguous_chunk, bounding_box.topleft) return source_data.max()
def test_contains() -> None: assert BoundingBox((1, 1, 1), (5, 5, 5)).contains((2, 2, 2)) assert BoundingBox((1, 1, 1), (5, 5, 5)).contains((1, 1, 1)) # top-left is inclusive, bottom-right is exclusive assert not BoundingBox((1, 1, 1), (5, 5, 5)).contains((6, 6, 6)) assert not BoundingBox((1, 1, 1), (5, 5, 5)).contains((20, 20, 20)) # nd-array may contain float values assert BoundingBox((1, 1, 1), (5, 5, 5)).contains(np.array([5.5, 5.5, 5.5])) assert not BoundingBox((1, 1, 1), (5, 5, 5)).contains(np.array([6.0, 6.0, 6.0]))
def _raw_chunk_converter( bounding_box: BoundingBox, source_raw_path: Path, target_mag_view: MagView, input_dtype: str, shape: Tuple[int, int, int], order: str, flip_axes: Optional[Union[int, Tuple[int, ...]]], ) -> None: logging.info(f"Conversion of {bounding_box.topleft}") source_data: np.memmap = np.memmap(source_raw_path, dtype=input_dtype, mode="r", shape=(1, ) + shape, order=order) if flip_axes: source_data = np.flip(source_data, flip_axes) contiguous_chunk = source_data[(slice(None), ) + bounding_box.to_slices()].copy(order="F") target_mag_view.write(contiguous_chunk, bounding_box.topleft)
def cubing( source_path: Path, target_path: Path, layer_name: str, batch_size: Optional[int], channel_index: Optional[int], sample_index: Optional[int], dtype: Optional[str], target_mag_str: str, data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, interpolation_mode_str: str, start_z: int, skip_first_z_slices: int, pad: bool, voxel_size: Tuple[float, float, float], executor_args: Namespace, ) -> Layer: source_files = find_source_filenames(source_path) all_num_x, all_num_y = zip(*[ image_reader.read_dimensions(source_files[i]) for i in range(len(source_files)) ]) num_x = max(all_num_x) num_y = max(all_num_y) # All images are assumed to have equal channels and samples num_channels = image_reader.read_channel_count(source_files[0]) num_samples = image_reader.read_sample_count(source_files[0]) num_output_channels = num_channels * num_samples if channel_index is not None: # if there is no c axis, but someone meant to only use one channel/sample, set the sample index instead if sample_index is None and num_channels == 1 and channel_index > 0: sample_index = channel_index channel_index = 0 assert ( 0 <= channel_index < num_channels ), "Selected channel is invalid. Please check the number of channels in the source file." num_output_channels = num_samples if sample_index is not None: # if no channel axis exists, it is valid to only set the sample index. Set channel index to 0 to avoid confusion if channel_index is None and num_channels == 1: channel_index = 0 assert (channel_index is not None ), "Sample index is only valid if a channel index is also set." assert ( 0 <= sample_index < num_samples ), "Selected sample is invalid. Please check the number of samples in the source file." num_output_channels = 1 num_z_slices_per_file = image_reader.read_z_slices_per_file( source_files[0]) assert (num_z_slices_per_file == 1 or len(source_files) == 1), "Multi page TIFF support only for single files" if num_z_slices_per_file > 1: num_z = num_z_slices_per_file else: num_z = len(source_files) if dtype is None: dtype = image_reader.read_dtype(source_files[0]) if batch_size is None: batch_size = DEFAULT_CHUNK_SIZE.z target_mag = Mag(target_mag_str) target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, data_format=data_format, ) target_layer.bounding_box = target_layer.bounding_box.extended_by( BoundingBox( Vec3Int(0, 0, start_z + skip_first_z_slices) * target_mag, Vec3Int(num_x, num_y, num_z - skip_first_z_slices) * target_mag, )) target_mag_view = target_layer.get_or_add_mag( target_mag, chunks_per_shard=chunks_per_shard, chunk_size=chunk_size, ) interpolation_mode = parse_interpolation_mode(interpolation_mode_str, target_layer.category) if target_mag != Mag(1): logging.info( f"Downsampling the cubed image to {target_mag} in memory with interpolation mode {interpolation_mode}." ) logging.info("Found source files: count={} size={}x{}".format( num_z, num_x, num_y)) with get_executor_for_args(executor_args) as executor: job_args = [] # We iterate over all z sections for z in range(skip_first_z_slices, num_z, DEFAULT_CHUNK_SIZE.z): # The z is used to access the source files. However, when writing the data, the `start_z` has to be considered. max_z = min(num_z, z + DEFAULT_CHUNK_SIZE.z) # Prepare source files array if len(source_files) > 1: source_files_array = source_files[z:max_z] else: source_files_array = source_files * (max_z - z) # Prepare job job_args.append(( target_mag_view.get_view( (0, 0, z + start_z), (num_x, num_y, max_z - z), ), target_mag, interpolation_mode, source_files_array, batch_size, pad, channel_index, sample_index, dtype, target_layer.num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(cubing_job, job_args), progress_desc=f"Cubing from {skip_first_z_slices} to {num_z}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id # Return layer return target_layer
def test_negative_inversion(bbox: BoundingBox, ) -> None: """Flipping the topleft and bottomright (by padding both with the negative size) results in the original bbox, as negative sizes are converted to positive ones.""" assert bbox == bbox.padded_with_margins(-bbox.size, -bbox.size)
def tile_cubing( target_path: Path, layer_name: str, batch_size: int, input_path_pattern: str, voxel_size: Tuple[int, int, int], args: Optional[Namespace] = None, ) -> None: decimal_lengths = get_digit_counts_for_dimensions(input_path_pattern) ( min_dimensions, max_dimensions, arbitrary_file, file_count, ) = detect_interval_for_dimensions(input_path_pattern, decimal_lengths) if not arbitrary_file: logging.error( f"No source files found. Maybe the input_path_pattern was wrong. You provided: {input_path_pattern}" ) return # Determine tile size from first matching file tile_width, tile_height = image_reader.read_dimensions(arbitrary_file) num_z = max_dimensions["z"] - min_dimensions["z"] + 1 num_x = (max_dimensions["x"] - min_dimensions["x"] + 1) * tile_width num_y = (max_dimensions["y"] - min_dimensions["y"] + 1) * tile_height x_offset = min_dimensions["x"] * tile_width y_offset = min_dimensions["y"] * tile_height num_channels = image_reader.read_channel_count(arbitrary_file) logging.info("Found source files: count={} with tile_size={}x{}".format( file_count, tile_width, tile_height)) if args is None or not hasattr(args, "dtype") or args.dtype is None: dtype = image_reader.read_dtype(arbitrary_file) else: dtype = args.dtype target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, ) bbox = BoundingBox( Vec3Int(x_offset, y_offset, min_dimensions["z"]), Vec3Int(num_x, num_y, num_z), ) if target_layer.bounding_box.volume() == 0: # If the layer is empty, we want to set the bbox directly because extending it # would mean that the bbox would always start at (0, 0, 0) target_layer.bounding_box = bbox else: target_layer.bounding_box = target_layer.bounding_box.extended_by(bbox) target_mag_view = target_layer.get_or_add_mag( Mag(1), block_len=DEFAULT_CHUNK_SIZE.z) with get_executor_for_args(args) as executor: job_args = [] # Iterate over all z batches for z_batch in get_regular_chunks(min_dimensions["z"], max_dimensions["z"], DEFAULT_CHUNK_SIZE.z): # The z_batch always starts and ends at a multiple of DEFAULT_CHUNK_SIZE.z. # However, we only want the part that is inside the bounding box z_batch = range( max(list(z_batch)[0], target_layer.bounding_box.topleft.z), min( list(z_batch)[-1] + 1, target_layer.bounding_box.bottomright.z), ) z_values = list(z_batch) job_args.append(( target_mag_view.get_view( (x_offset, y_offset, z_values[0]), (num_x, num_y, len(z_values)), ), z_values, input_path_pattern, batch_size, (tile_width, tile_height, num_channels), min_dimensions, max_dimensions, decimal_lengths, dtype, num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(tile_cubing_job, job_args), f"Tile cubing layer {layer_name}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id
def parse_bounding_box(bbox_str: str) -> BoundingBox: try: return BoundingBox.from_csv(bbox_str) except Exception as e: raise argparse.ArgumentTypeError( "The bounding box could not be parsed.") from e