Esempio n. 1
0
def upsample_cube_job(
    args: Tuple[View, View, int],
    mag_factors: List[float],
    buffer_shape: Vec3Int,
) -> None:
    (source_view, target_view, _i) = args

    assert all(
        1 >= f for f in mag_factors
    ), f"mag_factors ({mag_factors}) for upsampling must be smaller than 1"

    try:
        num_channels = target_view.info.num_channels
        target_size = target_view.bounding_box.in_mag(target_view.mag).size
        shape = (num_channels, ) + target_size.to_tuple()
        file_buffer = np.zeros(shape, target_view.get_dtype())

        tiles = product(*list([
            list(range(0, math.ceil(len)))
            for len in target_size.to_np() / buffer_shape.to_np()
        ]))

        for tile in tiles:
            target_offset = Vec3Int(tile) * buffer_shape
            source_offset = _vec3int_mulf(target_offset, mag_factors)
            source_size = source_view.bounding_box.in_mag(source_view.mag).size
            source_size = _vec3int_mulf(
                buffer_shape, mag_factors).pairmin(source_size - source_offset)

            bbox = BoundingBox(source_offset, source_size)
            cube_buffer_channels = source_view.read(
                relative_bounding_box=bbox.from_mag_to_mag1(source_view.mag), )

            for channel_index in range(num_channels):
                cube_buffer = cube_buffer_channels[channel_index]

                if not np.all(cube_buffer == 0):
                    # Upsample the buffer
                    inverse_factors = [int(1 / f) for f in mag_factors]
                    data_cube = upsample_cube(cube_buffer, inverse_factors)

                    buffer_offset = target_offset
                    buffer_end = buffer_offset + data_cube.shape

                    file_buffer[channel_index, buffer_offset[0]:buffer_end[0],
                                buffer_offset[1]:buffer_end[1],
                                buffer_offset[2]:buffer_end[2], ] = data_cube

        # Write the upsampled buffer to target
        if source_view.info.num_channels == 1:
            file_buffer = file_buffer[0]  # remove channel dimension
        target_view.write(file_buffer)

    except Exception as exc:
        logging.error(
            f"Upsampling of target {target_view.bounding_box} failed with {exc}"
        )
        raise exc
Esempio n. 2
0
    def _parse_bounding_box(cls, bounding_box_element: Element) -> BoundingBox:

        topleft = (
            int(bounding_box_element.get("topLeftX", 0)),
            int(bounding_box_element.get("topLeftY", 0)),
            int(bounding_box_element.get("topLeftZ", 0)),
        )
        size = (
            int(bounding_box_element.get("width", 0)),
            int(bounding_box_element.get("height", 0)),
            int(bounding_box_element.get("depth", 0)),
        )
        color = None
        if bounding_box_element.get("color.r"):  # also checks for empty strings
            color = (
                float(enforce_not_null(bounding_box_element.get("color.r"))),
                float(enforce_not_null(bounding_box_element.get("color.g"))),
                float(enforce_not_null(bounding_box_element.get("color.b"))),
                float(enforce_not_null(bounding_box_element.get("color.a"))),
            )

        return BoundingBox(
            topleft,
            size,
            name=bounding_box_element.get("name"),
            is_visible=bounding_box_element.get("isVisible", "true") == "true",
            id=bounding_box_element.get("id"),
            color=color,
        )
Esempio n. 3
0
def test_buffered_slice_reader_along_different_axis(tmp_path: Path) -> None:
    test_cube = (np.random.random((3, 13, 13, 13)) * 100).astype(np.uint8)
    cube_size_without_channel = Vec3Int(test_cube.shape[1:])
    offset = Vec3Int(5, 10, 20)

    for dim in [0, 1, 2]:
        ds = Dataset(tmp_path / f"buffered_slice_reader_{dim}",
                     voxel_size=(1, 1, 1))
        mag_view = ds.add_layer("color", COLOR_CATEGORY,
                                num_channels=3).add_mag(1)
        mag_view.write(test_cube, absolute_offset=offset)

        with mag_view.get_buffered_slice_reader(
                buffer_size=5,
                dimension=dim) as reader_a, mag_view.get_buffered_slice_reader(
                    absolute_bounding_box=BoundingBox(
                        offset, cube_size_without_channel),
                    buffer_size=5,
                    dimension=dim,
                ) as reader_b:
            i = 0
            for slice_data_a, slice_data_b in zip(reader_a, reader_b):
                if dim == 0:
                    original_slice = test_cube[:, i, :, :]
                elif dim == 1:
                    original_slice = test_cube[:, :, i, :]
                else:  # dim == 2
                    original_slice = test_cube[:, :, :, i]
                i += 1

                assert np.array_equal(slice_data_a, original_slice)
                assert np.array_equal(slice_data_b, original_slice)
Esempio n. 4
0
 def bounding_box(self, bbox: BoundingBox) -> None:
     """
     Updates the offset and size of the bounding box of this layer in the properties.
     """
     self.dataset._ensure_writable()
     assert (
         bbox.topleft.is_positive()
     ), f"Updating the bounding box of layer {self} to {bbox} failed, topleft must not contain negative dimensions."
     self._properties.bounding_box = bbox
     self.dataset._export_as_json()
     for mag in self.mags.values():
         mag._array.ensure_size(
             bbox.align_with_mag(mag.mag).in_mag(mag.mag).bottomright)
Esempio n. 5
0
def main(args: argparse.Namespace) -> None:

    # Use the skeleton API to read the bounding boxes once
    # https://github.com/scalableminds/webknossos-libs/issues/482 is done.
    nml_regex = re.compile(
        r'<userBoundingBox .*name="Limits of flood-fill \(source_id=(\d+), target_id=(\d+), seed=([\d,]+), timestamp=(\d+)\)".*topLeftX="(\d+)" topLeftY="(\d+)" topLeftZ="(\d+)" width="(\d+)" height="(\d+)" depth="(\d+)" />'
    )

    bboxes: List[FloodFillBbox] = []
    nml_file = open(args.nml_path, "r", encoding="utf-8")
    lines = nml_file.readlines()
    nml_file.close()
    for line in lines:
        matches = nml_regex.findall(line)
        for match in matches:
            # each match is a tuple of (source_id, target_id, seed, timestamp, top_left_x, top_left_y, top_left_z, width, height, depth
            bboxes.append(
                FloodFillBbox(
                    bounding_box=BoundingBox((match[4], match[5], match[6]),
                                             (match[7], match[8], match[9])),
                    seed_position=Vec3Int(match[2].split(",")),
                    source_id=int(match[0]),
                    target_id=int(match[1]),
                    timestamp=int(match[3]),
                ))
    bboxes = sorted(bboxes, key=lambda x: x.timestamp)

    time_start("Merge with fallback layer")
    data_mag = merge_with_fallback_layer(
        args.output_path,
        args.volume_path,
        args.segmentation_layer_path,
    )
    time_stop("Merge with fallback layer")

    time_start("All floodfills")
    for floodfill in bboxes:
        time_start("Floodfill")
        execute_floodfill(
            data_mag,
            floodfill.seed_position,
            floodfill.bounding_box,
            floodfill.source_id,
            floodfill.target_id,
        )
        time_stop("Floodfill")
    time_stop("All floodfills")

    time_start("Recompute downsampled mags")
    data_mag.layer.redownsample()
    time_stop("Recompute downsampled mags")
    def check_properties(annotation: wk.Annotation) -> None:
        assert len(annotation.user_bounding_boxes) == 2
        assert annotation.user_bounding_boxes[0].topleft.x == 2371
        assert annotation.user_bounding_boxes[0].name == "Bounding box 1"
        assert annotation.user_bounding_boxes[0].is_visible
        assert annotation.user_bounding_boxes[0].id == "1"

        assert annotation.user_bounding_boxes[1] == BoundingBox(
            (371, 4063, 1676), (891, 579, 232))
        assert annotation.user_bounding_boxes[1].name == "Bounding box 2"
        assert not annotation.user_bounding_boxes[1].is_visible
        assert annotation.user_bounding_boxes[1].color == (
            0.2705882489681244,
            0.6470588445663452,
            0.19607843458652496,
            1.0,
        )
    def _get_slice_generator(self) -> Generator[np.ndarray, None, None]:
        for batch in get_chunks(
            list(
                range(
                    self.bbox_current_mag.topleft[self.dimension],
                    self.bbox_current_mag.bottomright[self.dimension],
                )
            ),
            self.buffer_size,
        ):
            n_slices = len(batch)
            batch_start_idx = batch[0]

            assert (
                n_slices <= self.buffer_size
            ), f"n_slices should at most be batch_size, but {n_slices} > {self.buffer_size}"

            bbox_offset = self.bbox_current_mag.topleft
            bbox_size = self.bbox_current_mag.size

            buffer_bounding_box = BoundingBox.from_tuple2(
                (
                    bbox_offset[: self.dimension]
                    + (batch_start_idx,)
                    + bbox_offset[self.dimension + 1 :],
                    bbox_size[: self.dimension]
                    + (n_slices,)
                    + bbox_size[self.dimension + 1 :],
                )
            )

            if self.use_logging:
                info(
                    f"({getpid()}) Reading {n_slices} slices at position {batch_start_idx}."
                )
            data = self.view.read(
                absolute_bounding_box=buffer_bounding_box.from_mag_to_mag1(
                    self.view.mag
                )
            )

            for current_slice in np.rollaxis(
                data, self.dimension + 1
            ):  # The '+1' is important because the first dimension is the channel
                yield current_slice
    def __init__(
        self,
        view: "View",
        offset: Optional[Vec3IntLike] = None,
        size: Optional[Vec3IntLike] = None,
        # buffer_size specifies, how many slices should be aggregated until they are flushed.
        buffer_size: int = 32,
        dimension: int = 2,  # z
        *,
        relative_bounding_box: Optional[BoundingBox] = None,  # in mag1
        absolute_bounding_box: Optional[BoundingBox] = None,  # in mag1
        use_logging: bool = False,
    ) -> None:
        """see `View.get_buffered_slice_reader()`"""

        self.view = view
        self.buffer_size = buffer_size
        self.dtype = self.view.get_dtype()
        assert 0 <= dimension <= 2
        self.dimension = dimension
        self.use_logging = use_logging
        if offset is not None and size is not None:
            warnings.warn(
                "[DEPRECATION] Using offset and size for a buffered slice reader is deprecated. "
                + "Please use the parameter relative_bounding_box or absolute_bounding_box in Mag(1) instead.",
                DeprecationWarning,
            )
            assert relative_bounding_box is None and absolute_bounding_box is None
            absolute_bounding_box = BoundingBox(offset, size).from_mag_to_mag1(view.mag)
            offset = None
            size = None

        assert (
            offset is None and size is None
        ), "You have to set both offset and size or none of both."
        if relative_bounding_box is None and absolute_bounding_box is None:
            absolute_bounding_box = view.bounding_box
        if relative_bounding_box is not None:
            assert absolute_bounding_box is None
            absolute_bounding_box = relative_bounding_box.offset(
                view.bounding_box.topleft
            )

        assert absolute_bounding_box is not None
        self.bbox_current_mag = absolute_bounding_box.in_mag(view.mag)
Esempio n. 9
0
def merge_with_fallback_layer(
    output_path: Path,
    volume_annotation_path: Path,
    segmentation_layer_path: Path,
) -> MagView:

    assert not output_path.exists(), f"Dataset at {output_path} already exists"

    # Prepare output dataset by creatign a shallow copy of the dataset
    # determined by segmentation_layer_path, but do a deep copy of
    # segmentation_layer_path itself (so that we can mutate it).
    input_segmentation_dataset = wk.Dataset.open(
        segmentation_layer_path.parent)
    time_start("Prepare output dataset")
    output_dataset = input_segmentation_dataset.shallow_copy_dataset(
        output_path,
        name=output_path.name,
        make_relative=True,
        layers_to_ignore=[segmentation_layer_path.name],
    )
    output_layer = output_dataset.add_copy_layer(segmentation_layer_path,
                                                 segmentation_layer_path.name)
    time_stop("Prepare output dataset")

    input_segmentation_mag = input_segmentation_dataset.get_layer(
        segmentation_layer_path.name).get_finest_mag()
    with temporary_annotation_view(
            volume_annotation_path) as input_annotation_layer:
        input_annotation_mag = input_annotation_layer.get_finest_mag()
        bboxes = [
            bbox.in_mag(input_annotation_mag._mag)
            for bbox in input_annotation_mag.get_bounding_boxes_on_disk()
        ]
        output_mag = output_layer.get_mag(input_segmentation_mag.mag)

        cube_size = output_mag.info.chunk_size[
            0] * output_mag.info.chunks_per_shard[0]
        chunks_with_bboxes = BoundingBox.group_boxes_with_aligned_mag(
            bboxes, Mag(cube_size))

        assert (input_annotation_mag.info.chunks_per_shard == Vec3Int.ones()
                ), "volume annotation must have file_len=1"
        assert (input_annotation_mag.info.voxel_type ==
                input_segmentation_mag.info.voxel_type
                ), "Volume annotation must have same dtype as fallback layer"

        chunk_count = 0
        for chunk, bboxes in chunks_with_bboxes.items():
            chunk_count += 1
            logger.info(f"Processing chunk {chunk_count}...")

            time_start("Read chunk")
            data_buffer = output_mag.read(chunk.topleft,
                                          chunk.size)[0, :, :, :]
            time_stop("Read chunk")

            time_start("Read/merge bboxes")
            for bbox in bboxes:
                read_data = input_annotation_mag.read(bbox.topleft, bbox.size)
                data_buffer[bbox.offset(
                    -chunk.topleft).to_slices()] = read_data
            time_stop("Read/merge bboxes")

            time_start("Write chunk")
            output_mag.write(data_buffer, chunk.topleft)
            time_stop("Write chunk")
    return output_mag
Esempio n. 10
0
def execute_floodfill(
    data_mag: MagView,
    seed_position: Vec3Int,
    already_processed_bbox: BoundingBox,
    source_id: int,
    target_id: int,
) -> None:
    cube_size = data_mag.info.shard_size
    cube_bbox = BoundingBox(Vec3Int(0, 0, 0), cube_size)
    chunk_with_relative_seed: List[Tuple[Vec3Int, Vec3Int]] = [
        get_chunk_pos_and_offset(seed_position, cube_size)
    ]

    # The `is_visited` variable is used to know which parts of the already processed bbox
    # were already traversed. Outside of that bounding box, the actual data already
    # is an indicator of whether the flood-fill has reached a voxel.
    is_visited = np.zeros(already_processed_bbox.size.to_tuple(),
                          dtype=np.uint8)
    chunk_count = 0

    while len(chunk_with_relative_seed) > 0:
        chunk_count += 1
        if chunk_count % 10000 == 0:
            logger.info(f"Handled seed positions {chunk_count}")

        dirty_bucket = False
        current_cube, relative_seed = chunk_with_relative_seed.pop()
        global_seed = current_cube + relative_seed

        # Only reading one voxel for the seed can be up to 30,000 times faster
        # which is very relevent, since the chunk doesn't need to be traversed
        # if the seed voxel was already covered.
        value_at_seed_position = data_mag.read(current_cube + relative_seed,
                                               (1, 1, 1))

        if value_at_seed_position == source_id or (
                already_processed_bbox.contains(global_seed)
                and value_at_seed_position == target_id and
                not is_visited[global_seed - already_processed_bbox.topleft]):
            logger.info(
                f"Handling chunk {chunk_count} with current cube {current_cube}"
            )
            time_start("read data")
            cube_data = data_mag.read(current_cube, cube_size)
            cube_data = cube_data[0, :, :, :]
            time_stop("read data")

            seeds_in_current_chunk: Set[Vec3Int] = set()
            seeds_in_current_chunk.add(relative_seed)

            time_start("traverse cube")
            while len(seeds_in_current_chunk) > 0:
                current_relative_seed = seeds_in_current_chunk.pop()
                current_global_seed = current_cube + current_relative_seed
                if already_processed_bbox.contains(current_global_seed):
                    is_visited[current_global_seed -
                               already_processed_bbox.topleft] = 1

                if cube_data[current_relative_seed] != target_id:
                    cube_data[current_relative_seed] = target_id
                    dirty_bucket = True

                # check neighbors
                for neighbor in NEIGHBORS:
                    neighbor_pos = current_relative_seed + neighbor

                    global_neighbor_pos = current_cube + neighbor_pos
                    if already_processed_bbox.contains(global_neighbor_pos):
                        if is_visited[global_neighbor_pos -
                                      already_processed_bbox.topleft]:
                            continue
                    if cube_bbox.contains(neighbor_pos):
                        if cube_data[neighbor_pos] == source_id or (
                                already_processed_bbox.contains(
                                    global_neighbor_pos)
                                and cube_data[neighbor_pos] == target_id):
                            seeds_in_current_chunk.add(neighbor_pos)
                    else:
                        chunk_with_relative_seed.append(
                            get_chunk_pos_and_offset(global_neighbor_pos,
                                                     cube_size))
            time_stop("traverse cube")

            if dirty_bucket:
                time_start("write chunk")
                data_mag.write(cube_data, current_cube)
                time_stop("write chunk")
def download_dataset(
    dataset_name: str,
    organization_id: str,
    sharing_token: Optional[str] = None,
    bbox: Optional[BoundingBox] = None,
    layers: Optional[List[str]] = None,
    mags: Optional[List[Mag]] = None,
    path: Optional[Union[PathLike, str]] = None,
    exist_ok: bool = False,
) -> Dataset:
    context = _get_context()
    client = context.generated_client

    dataset_info_response = dataset_info.sync_detailed(
        organization_name=organization_id,
        data_set_name=dataset_name,
        client=client,
        sharing_token=sharing_token,
    )
    assert dataset_info_response.status_code == 200, dataset_info_response
    parsed = dataset_info_response.parsed
    assert parsed is not None

    datastore_client = context.get_generated_datastore_client(
        parsed.data_store.url)
    optional_datastore_token = sharing_token or context.datastore_token

    actual_path = Path(dataset_name) if path is None else Path(path)
    if actual_path.exists():
        logger.warning(f"{actual_path} already exists, skipping download.")
        return Dataset.open(actual_path)

    voxel_size = cast(Tuple[float, float, float],
                      tuple(parsed.data_source.scale))
    data_layers = parsed.data_source.data_layers
    dataset = Dataset(actual_path,
                      name=parsed.name,
                      voxel_size=voxel_size,
                      exist_ok=exist_ok)
    for layer_name in layers or [i.name for i in data_layers]:

        response_layers = [i for i in data_layers if i.name == layer_name]
        assert (
            len(response_layers) > 0
        ), f"The provided layer name {layer_name} could not be found in the requested dataset."
        assert (
            len(response_layers) == 1
        ), f"The provided layer name {layer_name} was found multiple times in the requested dataset."
        response_layer = response_layers[0]
        category = cast(LayerCategoryType, response_layer.category)
        layer = dataset.add_layer(
            layer_name=layer_name,
            category=category,
            dtype_per_layer=response_layer.element_class,
            num_channels=3 if response_layer.element_class == "uint24" else 1,
            largest_segment_id=response_layer.additional_properties.get(
                "largestSegmentId", None),
        )

        default_view_configuration_dict = None
        if not isinstance(response_layer.default_view_configuration, Unset):
            default_view_configuration_dict = (
                response_layer.default_view_configuration.to_dict())

        if default_view_configuration_dict is not None:
            default_view_configuration = dataset_converter.structure(
                default_view_configuration_dict, LayerViewConfiguration)
            layer.default_view_configuration = default_view_configuration

        if bbox is None:
            response_bbox = response_layer.bounding_box
            layer.bounding_box = BoundingBox(
                response_bbox.top_left,
                (response_bbox.width, response_bbox.height,
                 response_bbox.depth),
            )
        else:
            assert isinstance(
                bbox, BoundingBox
            ), f"Expected a BoundingBox object for the bbox parameter but got {type(bbox)}"
            layer.bounding_box = bbox
        if mags is None:
            mags = [Mag(mag) for mag in response_layer.resolutions]
        for mag in mags:
            mag_view = layer.get_or_add_mag(
                mag,
                compress=True,
                chunk_size=Vec3Int.full(32),
                chunks_per_shard=_DOWNLOAD_CHUNK_SIZE // 32,
            )
            aligned_bbox = layer.bounding_box.align_with_mag(mag, ceil=True)
            download_chunk_size_in_mag = _DOWNLOAD_CHUNK_SIZE * mag.to_vec3_int(
            )
            for chunk in track(
                    list(
                        aligned_bbox.chunk(download_chunk_size_in_mag,
                                           download_chunk_size_in_mag)),
                    description=f"Downloading layer={layer.name} mag={mag}",
            ):
                chunk_in_mag = chunk.in_mag(mag)
                response = dataset_download.sync_detailed(
                    organization_name=organization_id,
                    data_set_name=dataset_name,
                    data_layer_name=layer_name,
                    mag=mag.to_long_layer_name(),
                    client=datastore_client,
                    token=optional_datastore_token,
                    x=chunk.topleft.x,
                    y=chunk.topleft.y,
                    z=chunk.topleft.z,
                    width=chunk_in_mag.size.x,
                    height=chunk_in_mag.size.y,
                    depth=chunk_in_mag.size.z,
                )
                assert response.status_code == 200, response
                assert (
                    response.headers["missing-buckets"] == "[]"
                ), f"Download contained missing buckets {response.headers['missing-buckets']}."
                data = np.frombuffer(response.content,
                                     dtype=layer.dtype_per_channel).reshape(
                                         layer.num_channels,
                                         *chunk_in_mag.size,
                                         order="F")
                mag_view.write(data, absolute_offset=chunk.topleft)
    return dataset
Esempio n. 12
0
class DatasetProperties:
    id: Dict[str, str]
    scale: Tuple[float, float, float]
    data_layers: List[Union[SegmentationLayerProperties, LayerProperties, ]]
    default_view_configuration: Optional[DatasetViewConfiguration] = None


# --- Converter --------------------

dataset_converter = cattr.Converter()

# register (un-)structure hooks for non-attr-classes
bbox_to_wkw: Callable[[BoundingBox], dict] = lambda o: o.to_wkw_dict()
dataset_converter.register_unstructure_hook(BoundingBox, bbox_to_wkw)
dataset_converter.register_structure_hook(
    BoundingBox, lambda d, _: BoundingBox.from_wkw_dict(d))


def mag_unstructure(mag: Mag) -> List[int]:
    return mag.to_list()


dataset_converter.register_unstructure_hook(Mag, mag_unstructure)
dataset_converter.register_structure_hook(Mag, lambda d, _: Mag(d))

vec3int_to_array: Callable[[Vec3Int], List[int]] = lambda o: o.to_list()
dataset_converter.register_unstructure_hook(Vec3Int, vec3int_to_array)
dataset_converter.register_structure_hook(
    Vec3Int, lambda d, _: Vec3Int.full(d)
    if isinstance(d, int) else Vec3Int(d))