def _parse_vec3_int(value: str) -> Vec3Int: parts = [int(part.strip()) for part in value.split(",")] if len(parts) == 1: return Vec3Int.full(parts[0]) elif len(parts) == 3: return Vec3Int(*parts) else: raise TypeError(f"Cannot convert `{value}` to Vec3Int.")
def downsample_test_helper(WT1_path: Path, tmp_path: Path, use_compress: bool, chunk_size: Vec3Int) -> None: source_path = WT1_path target_path = tmp_path / "WT1_wkw" source_ds = Dataset.open(source_path) target_ds = source_ds.copy_dataset(target_path, chunk_size=chunk_size, chunks_per_shard=16) target_layer = target_ds.get_layer("color") mag1 = target_layer.get_mag("1") target_layer.delete_mag("2-2-1") # This is not needed for this test # The bounding box has to be set here explicitly because the downsampled data is written to a different dataset. target_layer.bounding_box = source_ds.get_layer("color").bounding_box mag2 = target_layer._initialize_mag_from_other_mag("2", mag1, use_compress) # The actual size of mag1 is (4600, 4600, 512). # To keep this test case fast, we are only downsampling a small part offset = (4096, 4096, 0) size = (504, 504, 512) source_buffer = mag1.read( absolute_offset=offset, size=size, )[0] assert np.any(source_buffer != 0) downsample_cube_job( ( mag1.get_view(absolute_offset=offset, size=size), mag2.get_view( absolute_offset=offset, size=size, ), 0, ), Vec3Int(2, 2, 2), InterpolationModes.MAX, Vec3Int.full(128), ) assert np.any(source_buffer != 0) target_buffer = mag2.read(absolute_offset=offset, size=size)[0] assert np.any(target_buffer != 0) assert np.all(target_buffer == downsample_cube(source_buffer, [2, 2, 2], InterpolationModes.MAX))
def convert_cube_job( source_knossos_info: KnossosDatasetInfo, args: Tuple[View, int] ) -> None: target_view, _ = args time_start(f"Converting of {target_view.bounding_box}") cube_size = cast(Tuple[int, int, int], (CUBE_EDGE_LEN,) * 3) offset = target_view.bounding_box.in_mag(target_view.mag).topleft size = target_view.bounding_box.in_mag(target_view.mag).size buffer = np.zeros(size.to_tuple(), dtype=target_view.get_dtype()) with open_knossos(source_knossos_info) as source_knossos: for x in range(0, size.x, CUBE_EDGE_LEN): for y in range(0, size.y, CUBE_EDGE_LEN): for z in range(0, size.z, CUBE_EDGE_LEN): cube_data = source_knossos.read( (offset + Vec3Int(x, y, z)).to_tuple(), cube_size ) buffer[ x : (x + CUBE_EDGE_LEN), y : (y + CUBE_EDGE_LEN), z : (z + CUBE_EDGE_LEN), ] = cube_data target_view.write(buffer) time_stop(f"Converting of {target_view.bounding_box}")
def convert_knossos( source_path: Path, target_path: Path, layer_name: str, dtype: str, voxel_size: Tuple[float, float, float], data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, mag: int = 1, args: Optional[Namespace] = None, ) -> None: source_knossos_info = KnossosDatasetInfo(source_path, dtype) target_dataset = Dataset(target_path, voxel_size, exist_ok=True) target_layer = target_dataset.get_or_add_layer( layer_name, COLOR_CATEGORY, data_format=data_format, dtype_per_channel=dtype, ) with open_knossos(source_knossos_info) as source_knossos: knossos_cubes = np.array(list(source_knossos.list_cubes())) if len(knossos_cubes) == 0: logging.error( "No input KNOSSOS cubes found. Make sure to pass the path which points to a KNOSSOS magnification (e.g., testdata/knossos/color/1)." ) exit(1) min_xyz = knossos_cubes.min(axis=0) * CUBE_EDGE_LEN max_xyz = (knossos_cubes.max(axis=0) + 1) * CUBE_EDGE_LEN target_layer.bounding_box = BoundingBox( Vec3Int(min_xyz), Vec3Int(max_xyz - min_xyz) ) target_mag = target_layer.get_or_add_mag( mag, chunk_size=chunk_size, chunks_per_shard=chunks_per_shard ) with get_executor_for_args(args) as executor: target_mag.for_each_chunk( partial(convert_cube_job, source_knossos_info), chunk_size=chunk_size * chunks_per_shard, executor=executor, progress_desc=f"Converting knossos layer {layer_name}", )
def test_downsample_multi_channel(tmp_path: Path) -> None: num_channels = 3 size = (32, 32, 10) source_data = (128 * np.random.randn(num_channels, size[0], size[1], size[2])).astype("uint8") ds = Dataset(tmp_path / "multi-channel-test", (1, 1, 1)) l = ds.add_layer( "color", COLOR_CATEGORY, dtype_per_channel="uint8", num_channels=num_channels, ) mag1 = l.add_mag("1", chunks_per_shard=32) print("writing source_data shape", source_data.shape) mag1.write(source_data) assert np.any(source_data != 0) mag2 = l._initialize_mag_from_other_mag("2", mag1, False) downsample_cube_job( (l.get_mag("1").get_view(), l.get_mag("2").get_view(), 0), Vec3Int(2, 2, 2), InterpolationModes.MAX, BUFFER_SHAPE, ) channels = [] for channel_index in range(num_channels): channels.append( downsample_cube(source_data[channel_index], [2, 2, 2], InterpolationModes.MAX)) joined_buffer = np.stack(channels) target_buffer = mag2.read() assert np.any(target_buffer != 0) assert np.all(target_buffer == joined_buffer)
def upsample_test_helper(tmp_path: Path, use_compress: bool) -> None: ds = Dataset(tmp_path, voxel_size=(10.5, 10.5, 5)) layer = ds.add_layer("color", COLOR_CATEGORY) mag2 = layer.add_mag([2, 2, 2]) offset = Vec3Int(WKW_CUBE_SIZE, 2 * WKW_CUBE_SIZE, 0) mag2.write( absolute_offset=offset, data=(np.random.rand(*BUFFER_SHAPE) * 255).astype(np.uint8), ) mag1 = layer._initialize_mag_from_other_mag("1-1-2", mag2, use_compress) source_buffer = mag2.read( absolute_offset=offset, size=BUFFER_SHAPE, )[0] assert np.any(source_buffer != 0) upsample_cube_job( ( mag2.get_view(absolute_offset=offset, size=BUFFER_SHAPE), mag1.get_view( absolute_offset=offset, size=BUFFER_SHAPE, ), 0, ), [0.5, 0.5, 1.0], BUFFER_SHAPE, ) assert np.any(source_buffer != 0) target_buffer = mag1.read(absolute_offset=offset, size=BUFFER_SHAPE)[0] assert np.any(target_buffer != 0) assert np.all(target_buffer == upsample_cube(source_buffer, [2, 2, 1]))
def tile_cubing( target_path: Path, layer_name: str, batch_size: int, input_path_pattern: str, voxel_size: Tuple[int, int, int], args: Optional[Namespace] = None, ) -> None: decimal_lengths = get_digit_counts_for_dimensions(input_path_pattern) ( min_dimensions, max_dimensions, arbitrary_file, file_count, ) = detect_interval_for_dimensions(input_path_pattern, decimal_lengths) if not arbitrary_file: logging.error( f"No source files found. Maybe the input_path_pattern was wrong. You provided: {input_path_pattern}" ) return # Determine tile size from first matching file tile_width, tile_height = image_reader.read_dimensions(arbitrary_file) num_z = max_dimensions["z"] - min_dimensions["z"] + 1 num_x = (max_dimensions["x"] - min_dimensions["x"] + 1) * tile_width num_y = (max_dimensions["y"] - min_dimensions["y"] + 1) * tile_height x_offset = min_dimensions["x"] * tile_width y_offset = min_dimensions["y"] * tile_height num_channels = image_reader.read_channel_count(arbitrary_file) logging.info("Found source files: count={} with tile_size={}x{}".format( file_count, tile_width, tile_height)) if args is None or not hasattr(args, "dtype") or args.dtype is None: dtype = image_reader.read_dtype(arbitrary_file) else: dtype = args.dtype target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_channels, ) bbox = BoundingBox( Vec3Int(x_offset, y_offset, min_dimensions["z"]), Vec3Int(num_x, num_y, num_z), ) if target_layer.bounding_box.volume() == 0: # If the layer is empty, we want to set the bbox directly because extending it # would mean that the bbox would always start at (0, 0, 0) target_layer.bounding_box = bbox else: target_layer.bounding_box = target_layer.bounding_box.extended_by(bbox) target_mag_view = target_layer.get_or_add_mag( Mag(1), block_len=DEFAULT_CHUNK_SIZE.z) with get_executor_for_args(args) as executor: job_args = [] # Iterate over all z batches for z_batch in get_regular_chunks(min_dimensions["z"], max_dimensions["z"], DEFAULT_CHUNK_SIZE.z): # The z_batch always starts and ends at a multiple of DEFAULT_CHUNK_SIZE.z. # However, we only want the part that is inside the bounding box z_batch = range( max(list(z_batch)[0], target_layer.bounding_box.topleft.z), min( list(z_batch)[-1] + 1, target_layer.bounding_box.bottomright.z), ) z_values = list(z_batch) job_args.append(( target_mag_view.get_view( (x_offset, y_offset, z_values[0]), (num_x, num_y, len(z_values)), ), z_values, input_path_pattern, batch_size, (tile_width, tile_height, num_channels), min_dimensions, max_dimensions, decimal_lengths, dtype, num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(tile_cubing_job, job_args), f"Tile cubing layer {layer_name}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id
def cubing( source_path: Path, target_path: Path, layer_name: str, batch_size: Optional[int], channel_index: Optional[int], sample_index: Optional[int], dtype: Optional[str], target_mag_str: str, data_format: DataFormat, chunk_size: Vec3Int, chunks_per_shard: Vec3Int, interpolation_mode_str: str, start_z: int, skip_first_z_slices: int, pad: bool, voxel_size: Tuple[float, float, float], executor_args: Namespace, ) -> Layer: source_files = find_source_filenames(source_path) all_num_x, all_num_y = zip(*[ image_reader.read_dimensions(source_files[i]) for i in range(len(source_files)) ]) num_x = max(all_num_x) num_y = max(all_num_y) # All images are assumed to have equal channels and samples num_channels = image_reader.read_channel_count(source_files[0]) num_samples = image_reader.read_sample_count(source_files[0]) num_output_channels = num_channels * num_samples if channel_index is not None: # if there is no c axis, but someone meant to only use one channel/sample, set the sample index instead if sample_index is None and num_channels == 1 and channel_index > 0: sample_index = channel_index channel_index = 0 assert ( 0 <= channel_index < num_channels ), "Selected channel is invalid. Please check the number of channels in the source file." num_output_channels = num_samples if sample_index is not None: # if no channel axis exists, it is valid to only set the sample index. Set channel index to 0 to avoid confusion if channel_index is None and num_channels == 1: channel_index = 0 assert (channel_index is not None ), "Sample index is only valid if a channel index is also set." assert ( 0 <= sample_index < num_samples ), "Selected sample is invalid. Please check the number of samples in the source file." num_output_channels = 1 num_z_slices_per_file = image_reader.read_z_slices_per_file( source_files[0]) assert (num_z_slices_per_file == 1 or len(source_files) == 1), "Multi page TIFF support only for single files" if num_z_slices_per_file > 1: num_z = num_z_slices_per_file else: num_z = len(source_files) if dtype is None: dtype = image_reader.read_dtype(source_files[0]) if batch_size is None: batch_size = DEFAULT_CHUNK_SIZE.z target_mag = Mag(target_mag_str) target_ds = Dataset(target_path, voxel_size=voxel_size, exist_ok=True) is_segmentation_layer = layer_name == "segmentation" if is_segmentation_layer: target_layer = target_ds.get_or_add_layer( layer_name, SEGMENTATION_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, largest_segment_id=0, ) else: target_layer = target_ds.get_or_add_layer( layer_name, COLOR_CATEGORY, dtype_per_channel=dtype, num_channels=num_output_channels, data_format=data_format, ) target_layer.bounding_box = target_layer.bounding_box.extended_by( BoundingBox( Vec3Int(0, 0, start_z + skip_first_z_slices) * target_mag, Vec3Int(num_x, num_y, num_z - skip_first_z_slices) * target_mag, )) target_mag_view = target_layer.get_or_add_mag( target_mag, chunks_per_shard=chunks_per_shard, chunk_size=chunk_size, ) interpolation_mode = parse_interpolation_mode(interpolation_mode_str, target_layer.category) if target_mag != Mag(1): logging.info( f"Downsampling the cubed image to {target_mag} in memory with interpolation mode {interpolation_mode}." ) logging.info("Found source files: count={} size={}x{}".format( num_z, num_x, num_y)) with get_executor_for_args(executor_args) as executor: job_args = [] # We iterate over all z sections for z in range(skip_first_z_slices, num_z, DEFAULT_CHUNK_SIZE.z): # The z is used to access the source files. However, when writing the data, the `start_z` has to be considered. max_z = min(num_z, z + DEFAULT_CHUNK_SIZE.z) # Prepare source files array if len(source_files) > 1: source_files_array = source_files[z:max_z] else: source_files_array = source_files * (max_z - z) # Prepare job job_args.append(( target_mag_view.get_view( (0, 0, z + start_z), (num_x, num_y, max_z - z), ), target_mag, interpolation_mode, source_files_array, batch_size, pad, channel_index, sample_index, dtype, target_layer.num_channels, )) largest_segment_id_per_chunk = wait_and_ensure_success( executor.map_to_futures(cubing_job, job_args), progress_desc=f"Cubing from {skip_first_z_slices} to {num_z}", ) if is_segmentation_layer: largest_segment_id = max(largest_segment_id_per_chunk) cast(SegmentationLayer, target_layer).largest_segment_id = largest_segment_id # Return layer return target_layer
def test_compressed_downsample_cube_job(WT1_path: Path, tmp_path: Path) -> None: with warnings.catch_warnings(): warnings.filterwarnings( "error") # This escalates the warning to an error downsample_test_helper(WT1_path, tmp_path, True, Vec3Int.full(32))
def test_downsample_cube_job(WT1_path: Path, tmp_path: Path) -> None: downsample_test_helper(WT1_path, tmp_path, False, Vec3Int.full(16))
import pytest from webknossos import COLOR_CATEGORY, Dataset, Mag, Vec3Int from webknossos.dataset._downsampling_utils import ( InterpolationModes, _mode, calculate_default_coarsest_mag, calculate_mags_to_downsample, calculate_mags_to_upsample, downsample_cube, downsample_cube_job, non_linear_filter_3d, ) from webknossos.dataset.sampling_modes import SamplingModes BUFFER_SHAPE = Vec3Int.full(256) def test_downsample_cube() -> None: buffer = np.zeros(BUFFER_SHAPE, dtype=np.uint8) buffer[:, :, :] = np.arange(0, BUFFER_SHAPE.x) output = downsample_cube(buffer, [2, 2, 2], InterpolationModes.MODE) assert np.all(output.shape == (BUFFER_SHAPE.to_np() // 2)) assert buffer[0, 0, 0] == 0 assert buffer[0, 0, 1] == 1 assert np.all(output[:, :, :] == np.arange(0, BUFFER_SHAPE.x, 2)) def test_downsample_mode() -> None:
setup_logging(args) if args.isotropic is not None: raise DeprecationWarning( "The flag 'isotropic' is deprecated. Consider using '--sampling_mode isotropic' instead." ) if args.anisotropic_target_mag is not None: raise DeprecationWarning( "The 'anisotropic_target_mag' flag is deprecated. Use '--max' instead (and consider changing the 'sampling_mode')" ) from_mag = Mag(args.from_mag) target_mag = Mag(args.target_mag) buffer_shape = ( Vec3Int.full(args.buffer_cube_size) if args.buffer_cube_size is not None else None ) upsample_mags( args.path, args.layer_name, from_mag, target_mag, buffer_shape=buffer_shape, compress=not args.no_compress, sampling_mode=args.sampling_mode, args=args, )
args = create_parser().parse_args() setup_logging(args) if args.isotropic is not None: raise DeprecationWarning( "The flag 'isotropic' is deprecated. Consider using '--sampling_mode isotropic' instead." ) if args.anisotropic_target_mag is not None: raise DeprecationWarning( "The 'anisotropic_target_mag' flag is deprecated. Use '--max' instead (and consider changing the 'sampling_mode')" ) from_mag = Mag(args.from_mag) max_mag = None if args.max is None else Mag(args.max) buffer_shape = (Vec3Int.full(args.buffer_cube_size) if args.buffer_cube_size is not None else None) downsample_mags( path=args.path, layer_name=args.layer_name, from_mag=from_mag, max_mag=max_mag, interpolation_mode=args.interpolation_mode, compress=not args.no_compress, sampling_mode=args.sampling_mode, buffer_shape=buffer_shape, force_sampling_scheme=args.force_sampling_scheme, args=get_executor_args(args), )