def test_sparse_boxes_WITH_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90)
    block_mask[0, 0, 0:5] = True

    # spans a single brick (physical: 30-60, logical: 30-60)
    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT OFFSET
    mask_box_start = np.array([0, 10, 20])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30), (0, 0, 0))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)

    assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]],
                              [[0, 10, 60], [10, 20, 90]],
                              [[0, 20, 30], [10, 30, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)

    assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 60]],
                               [[0, 10, 60], [10, 20, 70]],
                               [[0, 20, 30], [10, 30, 60]]]).all()
def test_sparse_boxes_NO_OFFSET():
    block_mask = np.zeros((5, 6, 7), dtype=bool)

    block_mask[0, 0, 0:5] = True

    block_mask[0, 1, 1:4] = True

    block_mask_resolution = 10

    # MASK STARTS AT ORIGIN (NO OFFSET)
    mask_box_start = np.array([0, 0, 0])
    mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape)

    block_mask_box = (mask_box_start, mask_box_stop)
    brick_grid = Grid((10, 10, 30))

    sparse_block_mask = SparseBlockMask(block_mask, block_mask_box,
                                        block_mask_resolution)
    logical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                   return_logical_boxes=True)
    assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]],
                              [[0, 0, 30], [10, 10, 60]],
                              [[0, 10, 0], [10, 20, 30]],
                              [[0, 10, 30], [10, 20, 60]]]).all()

    physical_boxes = sparse_block_mask.sparse_boxes(brick_grid,
                                                    return_logical_boxes=False)
    assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]],
                               [[0, 0, 30], [10, 10, 50]],
                               [[0, 10, 10], [10, 20, 30]],
                               [[0, 10, 30], [10, 20, 40]]]).all()
Beispiel #3
0
    def init_boxes(self, volume_service, roi):
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        if roi["scale"] is not None:
            scale = roi["scale"]
        elif isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"
        else:
            scale = 0

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
Beispiel #4
0
    def init_boxes(self, volume_service, roi):
        if not roi:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service
        assert isinstance(base_service, DvidVolumeService), \
            "Can't specify an ROI unless you're using a dvid input"

        assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
            "The 'roi' option doesn't support adapters other than 'rescale-level'"
        scale = 0
        if isinstance(volume_service, ScaledVolumeService):
            scale = volume_service.scale_delta
            assert scale <= 5, \
                "The 'roi' option doesn't support volumes downscaled beyond level 5"

        server, uuid, _seg_instance = base_service.instance_triple

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**(5 - scale))
        seg_box_s0 = seg_box * 2**scale
        seg_box_s5 = seg_box // 2**(5 - scale)

        with Timer(
                f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(server,
                                       uuid,
                                       roi,
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale))
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
def test_get_fullres_mask():
    coarse_mask = np.random.randint(2, size=(10, 10), dtype=bool)
    full_mask = upsample(coarse_mask, 10)
    sbm = SparseBlockMask(coarse_mask, [(0, 0), (100, 100)], (10, 10))

    # Try the exact bounding box
    extracted = sbm.get_fullres_mask([(0, 0), (100, 100)])
    assert (extracted == full_mask).all()

    # Try a bounding box that exceeds the original mask
    # (excess region should be all zeros)
    extracted = sbm.get_fullres_mask([(10, 20), (150, 150)])
    assert extracted.shape == (140, 130)
    expected = np.zeros((140, 130), dtype=bool)
    expected[:90, :80] = full_mask[10:, 20:]
    assert (extracted == expected).all()
    def sparse_block_mask_for_labels(self, labels, clip=True):
        """
        Determine which bricks (each with our ``preferred_message_shape``)
        would need to be accessed download all data for the given labels,
        and return the result as a ``SparseBlockMask`` object.

        This function uses a dask to fetch the coarse sparsevols in parallel.
        The sparsevols are extracted directly from the labelindex.
        If the ``self.supervoxels`` is True, the labels are grouped
        by body before fetching the labelindexes,
        to avoid fetching the same labelindexes more than once.

        Args:
            labels:
                A list of body IDs (if ``self.supervoxels`` is False),
                or supervoxel IDs (if ``self.supervoxels`` is True).

            clip:
                If True, filter the results to exclude any coordinates
                that fall outside this service's bounding-box.
                Otherwise, all brick coordinates that encompass the given label groups
                will be returned, whether or not they fall within the bounding box.

        Returns:
            ``SparseBlockMask``
        """
        from neuclease.util import SparseBlockMask
        coords_df = self.sparse_brick_coords_for_labels(labels, clip)
        coords_df.drop_duplicates(['z', 'y', 'x'], inplace=True)

        brick_shape = self.preferred_message_shape
        coords_df[['z', 'y', 'x']] //= brick_shape

        coords = coords_df[['z', 'y', 'x']].values
        return SparseBlockMask.create_from_lowres_coords(coords, brick_shape)
Beispiel #7
0
    def init_boxes(self, volume_service, roi, chunk_shape_s0):
        """
        Return a set of bounding boxes to tile the given ROI.
        Scale 0 of the volume service should correspond to full-res data,
        which is 32x higher-res than ROI resolution.
        """
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    chunk_shape_s0,
                                    clipped=True)
            return np.array([*boxes])

        base_service = volume_service.base_service

        if not roi["server"] or not roi["uuid"]:
            assert isinstance(base_service, DvidVolumeService), \
                "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

        roi["server"] = (roi["server"] or volume_service.server)
        roi["uuid"] = (roi["uuid"] or volume_service.uuid)

        assert not (chunk_shape_s0 % 2**5).any(), \
            "If using an ROI, select a chunk shape that is divisible by 32"

        seg_box_s0 = volume_service.bounding_box_zyx
        seg_box_s0 = round_box(seg_box_s0, 2**5)
        seg_box_s5 = seg_box_s0 // 2**5

        with Timer(
                f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(roi["server"],
                                       roi["uuid"],
                                       roi["name"],
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box_s0, 2**5)
        boxes = sbm.sparse_boxes(chunk_shape_s0)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
    def init_brickwall(self, volume_service, subset_labels, roi):
        sbm = None

        if roi["name"]:
            base_service = volume_service.base_service

            if not roi["server"] or not roi["uuid"]:
                assert isinstance(base_service, DvidVolumeService), \
                    "Since you aren't using a DVID input source, you must specify the ROI server and uuid."

            roi["server"] = (roi["server"] or volume_service.server)
            roi["uuid"] = (roi["uuid"] or volume_service.uuid)

            if roi["scale"] is not None:
                scale = roi["scale"]
            elif isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"
            else:
                scale = 0

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, 2**(5-scale))
            seg_box_s0 = seg_box * 2**scale
            seg_box_s5 = seg_box // 2**(5-scale)

            with Timer(f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger):
                roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5)

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5-scale))

        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels([*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError("Could not find sparse masks for any of the subset-labels")
            except NotImplementedError:
                sbm = None

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes

            # Apply halo WHILE downloading the data.
            # TODO: Allow the user to configure whether or not the halo should
            #       be fetched from the outset, or added after the blocks are loaded.
            halo = self.config["connectedcomponents"]["halo"]
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x')

        return brickwall
    def init_boxes(self, volume_service, subset_labels, roi):
        sbm = None
        if roi:
            base_service = volume_service.base_service
            assert isinstance(base_service, DvidVolumeService), \
                "Can't specify an ROI unless you're using a dvid input"

            assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
                "The 'roi' option doesn't support adapters other than 'rescale-level'"
            scale = 0
            if isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"

            server, uuid, _seg_instance = base_service.instance_triple

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, brick_shape)
            seg_box_s0 = seg_box * 2**scale
            seg_box_s5 = seg_box // 2**(5 - scale)

            with Timer(
                    f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})",
                    logger):
                roi_mask_s5, _ = fetch_roi(server,
                                           uuid,
                                           roi,
                                           format='mask',
                                           mask_box=seg_box_s5)

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask.create_from_highres_mask(
                roi_mask_s5, 2**(5 - scale), seg_box, brick_shape)
        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels(
                    [*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError(
                        "Could not find sparse masks for any of the subset-labels"
                    )
            except NotImplementedError:
                sbm = None

        if sbm is None:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])
        else:
            return sbm.sparse_boxes(brick_shape)
    def _get_sparse_block_mask(self, volume_service):
        """
        If the user's config specified a sparse subset of bodies to process,
        Return a SparseBlockMask object indicating where those bodies reside.
        
        If the user did not specify a 'subset-bodies' list, returns None, indicating
        that all segmentation blocks in the volume should be read.
        
        Also, if the input volume is not from a DvidVolumeService, return None.
        (In that case, the 'subset-bodies' feature can be used, but it isn't as efficient.)
        """
        import pandas as pd
        config = self.config_data

        sparse_body_ids = config["mesh-config"]["storage"]["subset-bodies"]
        if not sparse_body_ids:
            return None

        if not isinstance(volume_service.base_service, DvidVolumeService):
            # We only know how to retrieve sparse blocks for DVID volumes.
            # For other volume sources, we'll just have to fetch everything and filter
            # out the unwanted bodies at the mask aggregation step.
            return None

        grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"]
        assert grouping_scheme in ('no-groups', 'singletons', 'labelmap'), \
            f"Not allowed to use 'subset-bodies' setting for grouping scheme: {grouping_scheme}"

        if grouping_scheme in ('no-groups', 'singletons'):
            # The 'body ids' are identical to segment ids
            sparse_segment_ids = sparse_body_ids
        elif grouping_scheme == 'labelmap':
            # We need to convert the body ids into sparse segment ids
            mapping_pairs = self.load_labelmap()
            segments, bodies = mapping_pairs.transpose()

            # pandas.Series permits duplicate index values,
            # which is convenient for this reverse lookup
            reverse_lookup = pd.Series(index=bodies, data=segments)
            sparse_segment_ids = reverse_lookup.loc[sparse_body_ids].values

        # Fetch the sparse mask of blocks that the sparse segments belong to
        dvid_service = volume_service.base_service
        block_mask, lowres_box, block_shape = \
            sparkdvid.get_union_block_mask_for_bodies( dvid_service.server,
                                                       dvid_service.uuid,
                                                       dvid_service.instance_name,
                                                       sparse_segment_ids )

        fullres_box = lowres_box * block_shape
        return SparseBlockMask(block_mask, fullres_box, block_shape)
Beispiel #11
0
    def init_boxes(self, volume_service, roi):
        if not roi["name"]:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])

        server, uuid, roi_name = roi["server"], roi["uuid"], roi["name"]
        roi_scale = roi["relative-scale"]

        brick_shape = volume_service.preferred_message_shape
        assert not (brick_shape % 2**roi_scale).any(), \
            "If using an ROI, select a brick shape that is divisible by 32"

        seg_box = volume_service.bounding_box_zyx
        seg_box = round_box(seg_box, 2**roi_scale)
        seg_box_s5 = seg_box // 2**roi_scale

        with Timer(
                f"Fetching mask for ROI '{roi_name}' ({seg_box[:, ::-1].tolist()})",
                logger):
            roi_mask_s5, _ = fetch_roi(server,
                                       uuid,
                                       roi_name,
                                       format='mask',
                                       mask_box=seg_box_s5)

        # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
        sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**roi_scale)
        boxes = sbm.sparse_boxes(brick_shape)

        # Clip boxes to the true (not rounded) bounding box
        boxes[:, 0] = np.maximum(boxes[:, 0],
                                 volume_service.bounding_box_zyx[0])
        boxes[:, 1] = np.minimum(boxes[:, 1],
                                 volume_service.bounding_box_zyx[1])
        return boxes
    def init_brickwall(self, volume_service, subset_groups):
        try:
            brick_coords_df = volume_service.sparse_brick_coords_for_label_groups(
                subset_groups)
            np.save('brick-coords.npy',
                    brick_coords_df.to_records(index=False))

            brick_shape = volume_service.preferred_message_shape
            brick_indexes = brick_coords_df[['z', 'y', 'x'
                                             ]].values // brick_shape
            sbm = SparseBlockMask.create_from_lowres_coords(
                brick_indexes, brick_shape)
        except NotImplementedError:
            logger.warning(
                "The volume service does not support sparse fetching.  All bricks will be analyzed."
            )
            sbm = None

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes

            # Apply halo WHILE downloading the data.
            # TODO: Allow the user to configure whether or not the halo should
            #       be fetched from the outset, or added after the blocks are loaded.
            halo = self.config["findadjacencies"]["halo"]
            brickwall = BrickWall.from_volume_service(
                volume_service,
                0,
                None,
                self.client,
                target_partition_size_voxels,
                halo,
                sbm,
                compression='lz4_2x')

        return brickwall
def test_copysegmentation_from_hdf5_to_dvid_custom_sbm(
        setup_hdf5_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup_hdf5_segmentation_input

    # Our bricks are long in Z, so use a mask that's aligned that way, too.
    mask = np.zeros(volume.shape, bool)
    mask[:, :, 64:128] = True
    mask[:, :, 192:256] = True

    sbm = SparseBlockMask(mask[::64, ::64, ::64], [(0, 0, 0), volume.shape],
                          (64, 64, 64))
    with open(f"{template_dir}/sbm.pkl", 'wb') as f:
        pickle.dump(sbm, f)
    config["copysegmentation"]["sparse-block-mask"] = f"{template_dir}/sbm.pkl"

    setup = (template_dir, config, volume, dvid_address, repo_uuid,
             output_segmentation_name)
    box_zyx, expected_vol, output_vol = _run_to_dvid(setup,
                                                     check_scale_0=False)

    expected_vol = expected_vol.copy()
    mask = mask[box_to_slicing(*box_zyx)]
    expected_vol[~mask] = 0
    assert (output_vol == expected_vol).all()
Beispiel #14
0
    def _process_slab(self, slab_index, output_slab_box):
        """
        (The main work of this file.)

        Process a large slab of voxels:

        1. Read a 'slab' of bricks from the input as a BrickWall
        2. Translate it to the output coordinates.
        3. Splice & group the bricks so that they are aligned to the optimal output grid
        4. 'Pad' the bricks on the edges of the wall by *reading* data from the output destination,
            so that all bricks are complete (i.e. they completely fill their grid block).
        5. Write all bricks to the output destination.
        6. Downsample the bricks and repeat steps 3-5 for the downsampled scale.
        """
        options = self.config["copysegmentation"]
        pyramid_depth = options["pyramid-depth"]

        input_slab_box = output_slab_box - self.translation_offset_zyx
        if self.sbm is None:
            slab_sbm = None
        else:
            slab_sbm = SparseBlockMask.create_from_sbm_box(
                self.sbm, input_slab_box)

        try:
            input_wall = BrickWall.from_volume_service(
                self.input_service,
                0,
                input_slab_box,
                self.client,
                self.target_partition_size_voxels,
                sparse_block_mask=slab_sbm,
                compression=options['brick-compression'])

            if input_wall.num_bricks == 0:
                logger.info(
                    f"Slab: {slab_index}: No bricks to process.  Skipping.")
                return

        except RuntimeError as ex:
            if "SparseBlockMask selects no blocks" in str(ex):
                return

        input_wall.persist_and_execute(
            f"Slab {slab_index}: Reading ({input_slab_box[:,::-1].tolist()})",
            logger)

        # Translate coordinates from input to output
        # (which will leave the bricks in a new, offset grid)
        # This has no effect on the brick volumes themselves.
        if any(self.translation_offset_zyx):
            input_wall = input_wall.translate(self.translation_offset_zyx)

        id_offset = options["add-offset-to-ids"]
        if id_offset != 0:

            def add_offset(brick):
                # Offset everything except for label 0, which remains 0
                vol = brick.volume.copy()
                brick.compress()
                vol[vol != 0] += id_offset
                return vol

            input_wall = input_wall.map_brick_volumes(add_offset)

        output_service = self.output_service

        # Pad internally to block-align to the OUTPUT alignment.
        # Here, we assume that any output labelmap (if any) is idempotent,
        # so it's okay to read pre-existing output data that will ultimately get remapped.
        padded_wall = self._consolidate_and_pad(slab_index, input_wall, 0,
                                                output_service)

        # Write scale 0 to DVID
        if not options["skip-scale-0-write"]:
            self._write_bricks(slab_index, padded_wall, 0, output_service)

        if options["compute-block-statistics"]:
            with Timer(f"Slab {slab_index}: Computing slab block statistics",
                       logger):
                if options["compute-block-statistics"] is True:
                    block_shape = 3 * [
                        self.output_service.base_service.block_width
                    ]
                else:
                    block_shape = options["compute-block-statistics"]

                def block_stats_for_brick(brick):
                    vol = brick.volume
                    brick.compress()
                    return block_stats_for_volume(block_shape, vol,
                                                  brick.physical_box)

                slab_block_stats_per_brick = padded_wall.bricks.map(
                    block_stats_for_brick).compute()
                slab_block_stats_df = pd.concat(slab_block_stats_per_brick,
                                                ignore_index=True)
                del slab_block_stats_per_brick

            with Timer(
                    f"Slab {slab_index}: Appending stats and overwriting stats file"
            ):
                self._append_slab_statistics(slab_block_stats_df)

        for new_scale in range(1, 1 + pyramid_depth):
            if options[
                    "download-pre-downsampled"] and new_scale in self.input_service.available_scales:
                del padded_wall
                downsampled_wall = BrickWall.from_volume_service(
                    self.input_service,
                    new_scale,
                    input_slab_box,
                    self.client,
                    self.target_partition_size_voxels,
                    compression=options["brick-compression"])
                downsampled_wall.persist_and_execute(
                    f"Slab {slab_index}: Scale {new_scale}: Downloading pre-downsampled bricks",
                    logger)
            else:
                # Compute downsampled (results in smaller bricks)
                downsampled_wall = padded_wall.downsample(
                    (2, 2, 2), method=options["downsample-method"])
                downsampled_wall.persist_and_execute(
                    f"Slab {slab_index}: Scale {new_scale}: Downsampling",
                    logger)
                del padded_wall

            # Consolidate to full-size bricks and pad internally to block-align
            consolidated_wall = self._consolidate_and_pad(
                slab_index, downsampled_wall, new_scale, output_service)
            del downsampled_wall

            # Write to DVID
            self._write_bricks(slab_index, consolidated_wall, new_scale,
                               output_service)

            padded_wall = consolidated_wall
            del consolidated_wall
        del padded_wall
Beispiel #15
0
    def _init_masks(self):
        options = self.config["copysegmentation"]
        self.sbm = None

        if options["sparse-block-mask"]:
            # In theory, we could just take the intersection of the masks involved.
            # But I'm too lazy to think about that right now.
            assert not options["input-mask-labels"] and not options["output-mask-labels"], \
                "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels"

            with open(options["sparse-block-mask"], 'rb') as f:
                self.sbm = pickle.load(f)

        is_supervoxels = False
        if isinstance(self.input_service.base_service, DvidVolumeService):
            is_supervoxels = self.input_service.base_service.supervoxels

        output_mask_labels = load_body_list(options["output-mask-labels"],
                                            is_supervoxels)
        self.output_mask_labels = set(output_mask_labels)

        output_sbm = None
        if len(output_mask_labels) > 0:
            if (self.output_service.preferred_message_shape !=
                    self.input_service.preferred_message_shape).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape"
                )
            elif (self.output_service.bounding_box_zyx !=
                  self.input_service.bounding_box_zyx).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box"
                )
            else:
                try:
                    output_sbm = self.output_service.sparse_block_mask_for_labels(
                        output_mask_labels)
                except NotImplementedError:
                    output_sbm = None

        input_mask_labels = load_body_list(options["input-mask-labels"],
                                           is_supervoxels)

        input_sbm = None
        if len(input_mask_labels) > 0:
            try:
                input_sbm = self.input_service.sparse_block_mask_for_labels(
                    input_mask_labels)
            except NotImplementedError:
                input_sbm = None

        if self.sbm is not None:
            pass
        elif input_sbm is None:
            self.sbm = output_sbm
        elif output_sbm is None:
            self.sbm = input_sbm
        else:
            assert (input_sbm.resolution == output_sbm.resolution).all(), \
                "FIXME: At the moment, you can't supply both an input mask and an output "\
                "mask unless the input and output sources use the same brick shape (message-block-shape)"

            final_box = box_intersection(input_sbm.box, output_sbm.box)

            input_box = (input_sbm.box - final_box) // input_sbm.resolution
            input_mask = extract_subvol(input_sbm.lowres_mask, input_box)

            output_box = (output_sbm - final_box) // output_sbm.resolution
            output_mask = extract_subvol(output_sbm.lowres_mask, output_box)

            assert input_mask.shape == output_mask.shape
            assert input_mask.dtype == output_mask.dtype == np.bool
            final_mask = (input_mask & output_mask)

            self.sbm = SparseBlockMask(final_mask, final_box,
                                       input_sbm.resolution)

        id_offset = options["add-offset-to-ids"]
        if id_offset != 0:
            id_offset = options["add-offset-to-ids"]
            input_mask_labels = np.asarray(input_mask_labels, np.uint64)
            input_mask_labels += id_offset
        self.input_mask_labels = set(input_mask_labels)
Beispiel #16
0
    def init_boxes(self, volume_service, subset_labels, roi):
        sbm = None
        if roi:
            base_service = volume_service.base_service
            assert isinstance(base_service, DvidVolumeService), \
                "Can't specify an ROI unless you're using a dvid input"

            assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
                "The 'roi' option doesn't support adapters other than 'rescale-level'"
            scale = 0
            if isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"

            server, uuid, _seg_instance = base_service.instance_triple

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, brick_shape)
            seg_box_s5 = seg_box // 2**(5 - scale)

            with Timer(f"Fetching mask for ROI '{roi}'", logger):
                roi_mask_s5, roi_box_s5 = fetch_roi(server,
                                                    uuid,
                                                    roi,
                                                    format='mask')

            # Restrict to input bounding box
            clipped_roi_box_s5 = box_intersection(seg_box_s5, roi_box_s5)
            clipped_roi_mask_s5 = extract_subvol(
                roi_mask_s5, clipped_roi_box_s5 - roi_box_s5[0])

            # Align to brick grid
            aligned_roi_box_s5 = round_box(clipped_roi_box_s5,
                                           brick_shape // 2**5, 'out')
            padding = (aligned_roi_box_s5 - clipped_roi_box_s5)
            padding[0] *= -1
            aligned_roi_mask_s5 = np.pad(clipped_roi_mask_s5,
                                         padding.transpose())

            # At the service native scale
            aligned_roi_box = (2**(5 - scale) * aligned_roi_box_s5)
            logger.info(
                f"Brick-aligned ROI '{roi}' has bounding-box {aligned_roi_box[:, ::-1].tolist()}"
            )

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask.create_from_highres_mask(
                aligned_roi_mask_s5, 2**(5 - scale), aligned_roi_box,
                brick_shape)
        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels(
                    [*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError(
                        "Could not find sparse masks for any of the subset-labels"
                    )
            except NotImplementedError:
                sbm = None

        if sbm is None:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])
        else:
            boxes = sbm.sparse_boxes(brick_shape)
            boxes = np.array(boxes)

            # Clip
            boxes[:, 0, :] = np.maximum(volume_service.bounding_box_zyx[0],
                                        boxes[:, 0, :])
            boxes[:, 1, :] = np.minimum(volume_service.bounding_box_zyx[1],
                                        boxes[:, 1, :])
            assert (boxes[:,0,:] < boxes[:,1,:]).all(), \
                "After cropping to input volume, some bricks disappeared."

            return boxes
Beispiel #17
0
def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--no-downres', action='store_true')
    parser.add_argument('--only-within-roi')
    parser.add_argument('--not-within-roi')
    parser.add_argument('dvid_server')
    parser.add_argument('uuid')
    parser.add_argument('labelmap_instance')
    parser.add_argument('sparsevol_files', nargs='+')
    args = parser.parse_args()

    instance_info = (args.dvid_server, args.uuid, args.labelmap_instance)

    assert not args.only_within_roi or not args.not_within_roi, \
        "Can't supply both --only-within-roi and --not-within-roi.  Pick one or the other (or neither)."

    roi = args.only_within_roi or args.not_within_roi
    invert_roi = (args.not_within_roi is not None)

    if roi:
        roi_mask, mask_box = fetch_roi(args.dvid_server,
                                       args.uuid,
                                       roi,
                                       format='mask')
        roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5),
                                  2**5)  # ROIs are provided at scale 5
    else:
        roi_sbm = None

    # Ideally, we would choose the max label for the node we're writing to,
    # but the /maxlabel endpoint doesn't work for all nodes
    # instead, we'll use the repo-wide maxlabel from the /info JSON.
    #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance)
    maxlabel = fetch_instance_info(
        args.dvid_server, args.uuid,
        args.labelmap_instance)["Extended"]["MaxRepoLabel"]

    for i, path in enumerate(args.sparsevol_files):
        maxlabel += 1
        name = os.path.split(path)[1]
        prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ")

        with Timer(f"Pasting {name} as {maxlabel}", logger):
            overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel,
                                                     path, roi_sbm, invert_roi,
                                                     args.no_downres,
                                                     prefix_logger)

        results_path = os.path.splitext(path)[0] + '.json'
        with open(results_path, 'w') as f:
            results = {
                'new-label': maxlabel,
                'overwritten_labels': sorted(overwritten_labels)
            }
            json.dump(results, f, indent=2, cls=NumpyConvertingEncoder)

    logger.info(f"Done.")
Beispiel #18
0
    def execute(self):
        self._sanitize_config()

        input_config = self.config["input"]
        options = self.config["samplepoints"]
        resource_config = self.config["resource-manager"]

        resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"])
        volume_service = VolumeService.create_from_config(input_config, resource_mgr_client)

        input_csv = options["input-table"]
        with Timer(f"Reading {input_csv}", logger):
            coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES)
            points = coordinate_table_df[['z', 'y', 'x']].values

        rescale = options["rescale-points-to-level"]
        if rescale != 0:
            points //= (2**rescale)

        # All points must lie within the input volume        
        points_box = [points.min(axis=0), 1+points.max(axis=0)]
        if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all():
            raise RuntimeError("The point list includes points outside of the volume bounding box.")

        with Timer("Sorting points by Brick ID", logger):
            # 'Brick ID' is defined as the divided corner coordinate 
            brick_shape = volume_service.preferred_message_shape
            brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 )
            brick_ids_and_points = lexsort_columns(brick_ids_and_points)

            brick_ids = brick_ids_and_points[: ,:3]
            points = brick_ids_and_points[:, 3:]
            
            # Extract the first row of each group to get the set of unique brick IDs
            point_group_spans = groupby_spans_presorted(brick_ids)
            point_group_starts = (start for start, stop in point_group_spans)
            unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)]

        with Timer("Constructing sparse mask", logger):
            # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD,
            # with only a subset of Bricks (rather than a dense RDD containing every brick
            # within the volume bounding box).
            # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched.
            brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)])

            brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0])
            brick_mask = np.zeros(brick_mask_shape, bool)
            brick_mask_coords = unique_brick_ids - brick_mask_box[0]
            brick_mask[tuple(brick_mask_coords.transpose())] = True
            sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape)

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, 0, sbm, lazy=True)
        
        with Timer(f"Grouping {len(points)} points", logger):
            # This is faster than pandas.DataFrame.groupby() for large data
            point_groups = groupby_presorted(points, brick_ids)
            id_and_ptgroups = list(zip(unique_brick_ids, point_groups))
            num_groups = len(id_and_ptgroups)

        with Timer(f"Join {num_groups} point groups with bricks", logger):
            id_and_ptgroups = dask.bag.from_sequence( id_and_ptgroups,
                                                      npartitions=brickwall.bricks.npartitions )

            id_and_ptgroups = id_and_ptgroups.map(lambda i_p: (*i_p[0], i_p[1]))
            id_and_ptgroups_df = id_and_ptgroups.to_dataframe(columns=['z', 'y', 'x', 'pointgroup'])
            
            ids_and_bricks = brickwall.bricks.map(lambda brick: (*(brick.logical_box[0] // brick_shape), brick))
            ids_and_bricks_df = ids_and_bricks.to_dataframe(columns=['z', 'y', 'x', 'brick'])

            def set_brick_id_index(df):
                def set_brick_id(df):
                    df['brick_id'] = encode_coords_to_uint64( df[['z', 'y', 'x']].values.astype(np.int32) )
                    return df
                df['brick_id'] = np.uint64(0)
                df = df.map_partitions(set_brick_id, meta=df)

                # Note: bricks and pointgroups are already sorted by
                # brick scan-order so, brick_id is already sorted.
                # Specifying sorted=True is critical to performance here.
                df = df.set_index('brick_id', sorted=True)
                return df

            # Give them matching indexes
            ids_and_bricks_df = set_brick_id_index(ids_and_bricks_df)
            id_and_ptgroups_df = set_brick_id_index(id_and_ptgroups_df)

            # Join (index-on-index, so it should be fast)
            ptgroup_and_brick_df = id_and_ptgroups_df.merge( ids_and_bricks_df,
                                                             how='left', left_index=True, right_index=True )
            ptgroup_and_brick_df = ptgroup_and_brick_df[['pointgroup', 'brick']]
            ptgroup_and_brick = ptgroup_and_brick_df.to_bag()
            
        # Persist and force computation before proceeding.
        #ptgroup_and_brick = persist_and_execute(ptgroup_and_brick, "Persisting joined point groups", logger, False)
        #assert ptgroup_and_brick.count().compute() == num_groups == brickwall.num_bricks

        def sample_points(points_and_brick):
            """
            Given a Brick and array of points (N,3) that lie within it,
            sample labels from the points within the brick and return
            a record array containing the points and the sampled labels.
            """
            points, brick = points_and_brick

            result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)]
            result = np.zeros((len(points),), result_dtype)
            result['z'] = points[:,0]
            result['y'] = points[:,1]
            result['x'] = points[:,2]

            # Make relative to brick offset
            points -= brick.physical_box[0]
            
            result['label'] = brick.volume[tuple(points.transpose())]
            return result

        with Timer("Sampling bricks", logger):
            brick_samples = ptgroup_and_brick.map(sample_points).compute()

        with Timer("Concatenating samples", logger):
            sample_table = np.concatenate(brick_samples)

        with Timer("Sorting samples", logger):
            # This will sort in terms of the SCALED z,y,x coordinates
            sample_table.sort()

        with Timer("Sorting table", logger):
            if rescale == 0:
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)
            else:
                # sample_table is sorted by RESCALED coordiante,
                # so sort our table the same way
                coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale)
                coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale)
                coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale)
                coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True)
                del coordinate_table_df['rz']
                del coordinate_table_df['ry']
                del coordinate_table_df['rx']
                
        # Now that samples and input rows are sorted identically,
        # append the results
        output_col = options["output-column"]
        coordinate_table_df[output_col] = sample_table['label'].copy()

        if rescale != 0:
            with Timer("Re-sorting table at scale 0", logger):
                # For simplicity (API and testing), we guarantee that coordinates are sorted in the output.
                # In the case of rescaled points, they need to be sorted once more (at scale 0 this time)
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)

        with Timer("Exporting samples", logger):
            coordinate_table_df.to_csv(options["output-table"], header=True, index=False)

        logger.info("DONE.")