def test_copysegmentation_from_dvid_to_dvid_input_mask(
        setup_dvid_segmentation_input, disable_auto_retry):
    template_dir, config, volume, dvid_address, repo_uuid, _output_segmentation_name = setup_dvid_segmentation_input

    # make sure we get a fresh output
    output_segmentation_name = 'copyseg-with-input-mask-from-dvid'
    config["output"]["dvid"]["segmentation-name"] = output_segmentation_name

    # Add an offset, which is added to both the input volume AND the mask labels
    offset = 2000
    config["copysegmentation"]["add-offset-to-ids"] = offset

    # Select some labels that don't extend throughout the whole volume
    selected_labels = pd.unique(volume[150, 64:128, 64:128].reshape(-1))
    assert 0 not in selected_labels
    selected_coords = np.array(
        mask_for_labels(volume, selected_labels).nonzero()).transpose()
    selected_box = np.array(
        [selected_coords.min(axis=0), 1 + selected_coords.max(axis=0)])

    input_box = np.array(config["input"]["geometry"]["bounding-box"])[:, ::-1]

    subvol_box = box_intersection(input_box, selected_box)
    selected_subvol = extract_subvol(volume, subvol_box).copy()
    selected_subvol = apply_mask_for_labels(selected_subvol, selected_labels)
    config["copysegmentation"]["input-mask-labels"] = selected_labels.tolist()

    selected_subvol = np.where(selected_subvol, selected_subvol + offset, 0)
    expected_vol = np.zeros(volume.shape, np.uint64)
    overwrite_subvol(expected_vol, subvol_box, selected_subvol)

    setup = template_dir, config, expected_vol, dvid_address, repo_uuid, output_segmentation_name
    _box_zyx, _expected_vol, _output_vol = _run_to_dvid(setup)
Beispiel #2
0
 def logical_and_clipped(box):
     midpoint = (box[0] + box[1]) // 2
     logical_box = grid.compute_logical_box(midpoint)
     box += (-grid.halo_shape, grid.halo_shape)
     # Note: Non-intersecting boxes will have non-positive shape after clipping
     clipped_box = box_intersection(box, bounding_box)
     return (logical_box, clipped_box)
Beispiel #3
0
    def get_subvolume(self, box_zyx, scale=0):
        box_zyx = np.array(box_zyx)
        orig_box = box_zyx.copy()
        box_zyx -= (self._global_offset // (2**scale))

        clipped_box = box_intersection(box_zyx, [(0,0,0), self.zarr_dataset(scale).shape])
        if (clipped_box == box_zyx).all():
            return self.zarr_dataset(scale)[box_to_slicing(*box_zyx.tolist())]

        # Note that this message shows the true zarr storage bounds,
        # and doesn't show the logical bounds according to global_offset (if any).
        msg = f"Zarr Request is out-of-bounds (XYZ): {orig_box[:, ::-1].tolist()}"
        if self._out_of_bounds_access in ("permit", "permit-empty"):
            logger.warning(msg)
        else:
            msg += "\nAdd 'out-of-bounds-access' to your config to allow such requests"
            raise RuntimeError(msg)

        if (clipped_box[1] - clipped_box[0] <= 0).any():
            # request is completely out-of-bounds; just return zeros
            return np.zeros(box_zyx[1] - box_zyx[0], self.dtype)

        # Request is partially out-of-bounds; read what we can, zero-fill for the rest.
        clipped_vol = self.zarr_dataset(scale)[box_to_slicing(*clipped_box.tolist())]
        result = np.zeros(box_zyx[1] - box_zyx[0], self.dtype)
        localbox = clipped_box - box_zyx[0]
        result[box_to_slicing(*localbox)] = clipped_vol
        return result
Beispiel #4
0
    def _extract_subbrick(brick, box):
        """
        Given a brick and the box to extract from it,
        return a new Brick with the same logical_box as the original brick,
        but only containing the subvolume corresponding to the given box.

        If necessary, the returned subbrick will be zero-padded to full
        the entirety of the given box.
        """
        box_clipped = box_intersection(box, brick.physical_box)
        if (box_clipped[1] - box_clipped[0] <= 0).any():
            return None

        subvol = extract_subvol(brick.volume,
                                box_clipped - brick.physical_box[0])
        full_subvol = zero_fill(subvol, box_clipped, box)

        # FIXME: Should we bother with location_id?
        #        (If we don't, realign operations won't work,
        #        but it's not clear what that would mean for halos anyway)
        subbrick = Brick(brick.logical_box,
                         box,
                         full_subvol,
                         compression=brick.compression)
        return subbrick
Beispiel #5
0
def test_generate_bricks():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    bricks = bricks.compute()
    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Must not exceed bounding box
        assert (brick.physical_box == box_intersection( brick.logical_box, bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()

        # __sizeof__ must include the volume
        assert sys.getsizeof(brick) > sys.getsizeof(brick.volume)
Beispiel #6
0
def test_realign_bricks_to_new_grid_WITH_HALO():
    grid = Grid( (10,20), (12,3) )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    halo = 1
    halo_shape = np.array([1,1])
    new_grid = Grid((20,10), (0,0), halo)
    new_bricks = realign_bricks_to_new_grid(new_grid, original_bricks).compute()

    new_logical_boxes = list(brick.logical_box for brick in new_bricks)

    assert len(new_bricks) == 5 * 26, f"{len(new_bricks)}" # from (0,30) -> (100,290)
    
    for logical_box, brick in zip(new_logical_boxes, new_bricks):
        assert isinstance( brick, Brick ), f"Got {type(brick)}"
        assert (brick.logical_box == logical_box).all()

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == new_grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - new_grid.offset) % new_grid.block_shape == 0).all()
        
        # Should match logical_box+halo, except for edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
 def logical_and_clipped( box ):
     midpoint = (box[0] + box[1]) // 2
     logical_box = grid.compute_logical_box( midpoint )
     box += (-grid.halo_shape, grid.halo_shape)
     # Note: Non-intersecting boxes will have non-positive shape after clipping
     clipped_box = box_intersection(box, bounding_box)
     return ( logical_box, clipped_box )
Beispiel #8
0
def test_generate_bricks_WITH_HALO():
    halo = 1
    halo_shape = np.array([1,1])
    grid = Grid( (10,20), (12,3), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )
    bricks = bricks.compute()

    assert len(bricks) == 9 * 14 == num_bricks
    
    for brick in bricks:
        assert isinstance( brick, Brick )
        assert brick.logical_box.shape == (2,2)
        assert brick.physical_box.shape == (2,2)

        # logical_box must be exactly one block
        assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all()
        
        # Must be grid-aligned
        assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all()
        
        # Physical == logical+halo, except for bounding-box edges
        assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all()
        
        # Volume shape must match
        assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all()
        
        # Volume data must match
        assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
def assemble_brick_fragments( fragments ):
    """
    Given a list of Bricks with identical logical_boxes, splice their volumes
    together into a final Brick that contains a full volume containing all of
    the fragments.
    
    Note: Brick 'fragments' are also just Bricks, whose physical_box does
          not cover the entire logical_box for the brick.
    
    Each fragment's physical_box indicates where that fragment's data
    should be located within the final returned Brick.
    
    Returns: A Brick containing the data from all fragments,
            UNLESS the fully assembled fragments would not intersect
            with the Brick's own logical_box (i.e. all fragments fall
            within the halo), in which case None is returned.
    
    Note: If the fragment physical_boxes are not disjoint, the results
          are undefined.
    """
    fragments = list(fragments)

    # All logical boxes must be the same
    logical_boxes = np.asarray([frag.logical_box for frag in fragments])
    assert (logical_boxes == logical_boxes[0]).all(), \
        "Cannot assemble brick fragments from different logical boxes. "\
        "They belong to different bricks!"
    final_logical_box = logical_boxes[0]

    # The final physical box is the min/max of all fragment physical extents.
    physical_boxes = np.array([frag.physical_box for frag in fragments])
    assert physical_boxes.ndim == 3 # (N, 2, Dim)
    assert physical_boxes.shape == ( len(fragments), 2, final_logical_box.shape[1] )
    
    final_physical_box = np.asarray( ( np.min( physical_boxes[:,0,:], axis=0 ),
                                       np.max( physical_boxes[:,1,:], axis=0 ) ) )

    interior_box = box_intersection(final_physical_box, final_logical_box)
    if (interior_box[1] - interior_box[0] < 1).any():
        # All fragments lie completely within the halo
        return None

    final_volume_shape = final_physical_box[1] - final_physical_box[0]
    dtype = fragments[0].volume.dtype

    final_volume = np.zeros(final_volume_shape, dtype)

    for frag in fragments:
        internal_box = frag.physical_box - final_physical_box[0]
        overwrite_subvol(final_volume, internal_box, frag.volume)

        # Destroy original to save RAM
        frag.destroy()

    brick = Brick( final_logical_box, final_physical_box, final_volume )
    brick.compress()
    return brick
def clip_to_logical( brick ):
    """
    Truncate the given brick so that it's volume does not exceed the bounds of its logical_box.
    (Useful if the brick was originally constructed with a halo.)
    """
    intersection = box_intersection(brick.physical_box, brick.logical_box)
    assert (intersection[1] > intersection[0]).all(), \
        f"physical_box ({brick.physical_box}) does not intersect logical_box ({brick.logical_box})"
    
    intersection_within_physical = intersection - brick.physical_box[0]
    new_vol = brick.volume[ box_to_slicing(*intersection_within_physical) ]
    return Brick( brick.logical_box, intersection, new_vol )
def split_brick(new_grid, original_brick):
    """
    Given a single brick and a new grid to which its data should be redistributed,
    split the brick into pieces, indexed by their NEW grid locations.
    
    The brick fragments are returned as Bricks themselves, but with relatively
    small volume and physical_box members.
    
    Note: It is probably a mistake to call this function for Bricks which have
          a larger physical_box than logical_box, so that is currently forbidden.
          (It would work here, but it implies that you will end up with some voxels
          represented multiple times in a given RDD of Bricks, with undefined results
          as to which ones are kept after you consolidate them into a new alignment.
          
          However, the reverse is permitted, i.e. it is permitted for the DESTINATION
          grid to use a halo, in which case some pixels in the original brick will be
          duplicated to multiple destinations.
    
    Returns: [(box,Brick), (box, Brick), ....],
            where each Brick is a fragment (to be assembled later into the new grid's bricks),
            and 'box' is the logical_box of the Brick into which this fragment should be assembled.
    """
    new_logical_boxes_and_fragments = []
    
    # Forbid out-of-bounds physical_boxes. (See note above.)
    assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and
            (original_brick.physical_box[1] <= original_brick.logical_box[1]).all())
    
    # Iterate over the new boxes that intersect with the original brick
    for destination_box in boxes_from_grid(original_brick.physical_box, new_grid, include_halos=True):
        # Physical intersection of original with new
        split_box = box_intersection(destination_box, original_brick.physical_box)
        
        # Extract portion of original volume data that belongs to this new box
        split_box_internal = split_box - original_brick.physical_box[0]
        fragment_vol = extract_subvol(original_brick.volume, split_box_internal)

        # Subtract out halo to get logical_box
        new_logical_box = destination_box - (-new_grid.halo_shape, new_grid.halo_shape)

        fragment_brick = Brick(new_logical_box, split_box, fragment_vol)
        fragment_brick.compress()

        # Append key (the new_logical_box, but with a special type and hash,
        # to avoid bad collisions with the default spark hash function),
        # and new brick fragment, to be assembled into the final brick in a later stage.
        key = rt.tuple_with_hash( box_as_tuple(new_logical_box) )
        key.set_hash( hash(tuple(new_logical_box[0] / new_grid.block_shape)) )
        new_logical_boxes_and_fragments.append( (key, fragment_brick) )

    return new_logical_boxes_and_fragments
    def write_subvolume(self, subvolume, offset_zyx, scale=0):
        offset_zyx = np.array(offset_zyx)
        offset_zyx -= self._global_offset // (2**scale)
        box = np.array([offset_zyx, offset_zyx + subvolume.shape])

        stored_bounding_box = (self._bounding_box_zyx -
                               self._global_offset) // (2**scale)
        if (box[0] >= 0).all() and (box[1] <= stored_bounding_box[1]).all():
            # Box is fully contained within the Zarr volume bounding box.
            self.zarr_dataset(scale)[box_to_slicing(*box)] = subvolume
        else:
            msg = (
                "Box extends beyond Zarr volume bounds (XYZ): "
                f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}"
            )

            if self._out_of_bounds_access == 'forbid':
                # Note that this message shows the true zarr storage bounds,
                # and doesn't show the logical bounds according to global_offset (if any).
                msg = "Cannot write subvolume. " + msg
                msg += "\nAdd permit-out-of-bounds to your config to allow such writes,"
                msg += " assuming the out-of-bounds portion is completely empty."
                raise RuntimeError(msg)

            clipped_box = box_intersection(box, stored_bounding_box)

            # If any of the out-of-bounds portion is non-empty, that's an error.
            subvol_copy = subvolume.copy()
            subvol_copy[box_to_slicing(*(clipped_box - box[0]))] = 0
            if self._out_of_bounds_access == 'permit-empty' and subvol_copy.any(
            ):
                # Note that this message shows the true zarr storage bounds,
                # and doesn't show the logical bounds according to global_offset (if any).
                msg = (
                    "Cannot write subvolume. Box extends beyond Zarr volume storage bounds (XYZ): "
                    f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}\n"
                    "and the out-of-bounds portion is not empty (contains non-zero values).\n"
                )
                raise RuntimeError(msg)

            logger.warning(msg)
            clipped_subvolume = subvolume[box_to_slicing(*clipped_box -
                                                         box[0])]
            self.zarr_dataset(scale)[box_to_slicing(
                *clipped_box)] = clipped_subvolume
def _fill_gaps(mask, mask_box, analysis_scale, dilation_radius_s0, dilation_box):
    """
    Fill gaps between segments in the mask by dilating each segment
    and keeping the voxels that were covered by more than one dilation.
    """
    # Perform light dilation on the mask to fix gaps in the
    # segmentation due to hot knife seams, downsampling, etc.
    if dilation_radius_s0 == 0:
        return mask

    # We limit the dilation repair to a central box, to avoid joining
    # dendrites that just barely enter the volume in multiple places.
    # We only want to make repairs that aren't near the volume edge.
    dilation_box = box_intersection(mask_box, dilation_box)
    if (dilation_box[1] - dilation_box[0] <= 0).any():
        return mask

    # Perform dilation on each connected component independently,
    # and mark the areas where two dilated components overlap.
    # We'll add those overlapping voxels to the mask, to span
    # small gap defects in the segmentation.
    cc = labelMultiArrayWithBackground((mask != 0).view(np.uint8))
    cc_max = cc.max()
    if cc_max <= 1:
        return mask

    central_box = dilation_box - mask_box[0]
    cc_central = cc[box_to_slicing(*central_box)]

    dilation_radius = dilation_radius_s0 // (2**analysis_scale)
    dilated_intersections = np.zeros(cc_central.shape, bool)
    dilated_all = vigra.filters.multiBinaryDilation((cc_central == 1), dilation_radius)
    for i in range(2, cc_max+1):
        cc_dilated = vigra.filters.multiBinaryDilation((cc_central == i), dilation_radius)
        dilated_intersections[:] |= (dilated_all & cc_dilated)
        dilated_all[:] |= cc_dilated

    # Return a new array; don't modify the original in-place.
    mask = mask.astype(bool, copy=True)
    mask[box_to_slicing(*central_box)] |= dilated_intersections
    return mask.view(np.uint8)
Beispiel #14
0
def clip_to_logical(brick, recompress=True):
    """
    Truncate the given brick so that it's volume does not exceed the bounds of its logical_box.
    (Useful if the brick was originally constructed with a halo.)
    """
    intersection = box_intersection(brick.physical_box, brick.logical_box)
    assert (intersection[1] > intersection[0]).all(), \
        f"physical_box ({brick.physical_box}) does not intersect logical_box ({brick.logical_box})"

    intersection_within_physical = intersection - brick.physical_box[0]
    new_vol = brick.volume[box_to_slicing(*intersection_within_physical)]

    if recompress:
        compression = brick.compression
    else:
        compression = None

    new_brick = Brick(brick.logical_box,
                      intersection,
                      new_vol,
                      location_id=brick.location_id,
                      compression=compression)
    return new_brick
def block_stats_from_brick(block_shape, brick):
    """
    Get the count of voxels for each segment (excluding segment 0)
    in each block within the given brick, returned as a DataFrame.
    
    Returns a DataFrame with the following columns:
        ['segment_id', 'z', 'y', 'x', 'count']
        where z,y,z are the starting coordinates of each block.
    """
    block_grid = Grid(block_shape)
    
    block_dfs = []
    block_boxes = boxes_from_grid(brick.physical_box, block_grid)
    for box in block_boxes:
        clipped_box = box_intersection(box, brick.physical_box) - brick.physical_box[0]
        block_vol = brick.volume[box_to_slicing(*clipped_box)]
        counts = pd.Series(block_vol.reshape(-1)).value_counts(sort=False)
        segment_ids = counts.index.values
        counts = counts.values.astype(np.uint32)

        box = box.astype(np.int32)

        block_df = pd.DataFrame( { 'segment_id': segment_ids,
                                   'count': counts,
                                   'z': box[0][0],
                                   'y': box[0][1],
                                   'x': box[0][2] } )

        # Exclude segment 0 from output        
        block_df = block_df[block_df['segment_id'] != 0]

        block_dfs.append(block_df)

    brick_df = pd.concat(block_dfs, ignore_index=True)
    brick_df = brick_df[['segment_id', 'z', 'y', 'x', 'count']]
    assert list(brick_df.columns) == list(BLOCK_STATS_DTYPES.keys())
    return brick_df
Beispiel #16
0
    def _init_masks(self):
        options = self.config["copysegmentation"]
        self.sbm = None

        if options["sparse-block-mask"]:
            # In theory, we could just take the intersection of the masks involved.
            # But I'm too lazy to think about that right now.
            assert not options["input-mask-labels"] and not options["output-mask-labels"], \
                "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels"

            with open(options["sparse-block-mask"], 'rb') as f:
                self.sbm = pickle.load(f)

        is_supervoxels = False
        if isinstance(self.input_service.base_service, DvidVolumeService):
            is_supervoxels = self.input_service.base_service.supervoxels

        output_mask_labels = load_body_list(options["output-mask-labels"],
                                            is_supervoxels)
        self.output_mask_labels = set(output_mask_labels)

        output_sbm = None
        if len(output_mask_labels) > 0:
            if (self.output_service.preferred_message_shape !=
                    self.input_service.preferred_message_shape).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape"
                )
            elif (self.output_service.bounding_box_zyx !=
                  self.input_service.bounding_box_zyx).any():
                logger.warn(
                    "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box"
                )
            else:
                try:
                    output_sbm = self.output_service.sparse_block_mask_for_labels(
                        output_mask_labels)
                except NotImplementedError:
                    output_sbm = None

        input_mask_labels = load_body_list(options["input-mask-labels"],
                                           is_supervoxels)

        input_sbm = None
        if len(input_mask_labels) > 0:
            try:
                input_sbm = self.input_service.sparse_block_mask_for_labels(
                    input_mask_labels)
            except NotImplementedError:
                input_sbm = None

        if self.sbm is not None:
            pass
        elif input_sbm is None:
            self.sbm = output_sbm
        elif output_sbm is None:
            self.sbm = input_sbm
        else:
            assert (input_sbm.resolution == output_sbm.resolution).all(), \
                "FIXME: At the moment, you can't supply both an input mask and an output "\
                "mask unless the input and output sources use the same brick shape (message-block-shape)"

            final_box = box_intersection(input_sbm.box, output_sbm.box)

            input_box = (input_sbm.box - final_box) // input_sbm.resolution
            input_mask = extract_subvol(input_sbm.lowres_mask, input_box)

            output_box = (output_sbm - final_box) // output_sbm.resolution
            output_mask = extract_subvol(output_sbm.lowres_mask, output_box)

            assert input_mask.shape == output_mask.shape
            assert input_mask.dtype == output_mask.dtype == np.bool
            final_mask = (input_mask & output_mask)

            self.sbm = SparseBlockMask(final_mask, final_box,
                                       input_sbm.resolution)

        id_offset = options["add-offset-to-ids"]
        if id_offset != 0:
            id_offset = options["add-offset-to-ids"]
            input_mask_labels = np.asarray(input_mask_labels, np.uint64)
            input_mask_labels += id_offset
        self.input_mask_labels = set(input_mask_labels)
def stats_df_from_brick(column_names, brick, exclude_zero=True, exclude_halo=True):
    """
    For a given brick, return a DataFrame of statistics for the segments it contains.
    
    Args:
    
        column_names (list):
            Which statistics to compute. Anything from COLUMNS_INFO
            is permitted, except compressed_bytes.
            The 'segment' column must be first in the list.
        
        brick (Brick):
            The brick to process
        
        exclude_zero (bool):
            Discard statistics for segment=0.
        
        exclude_halo (bool):
            Exclude voxels that lie outside the Brick's logical_box.
    
    Returns:
        pd.DataFrame, with df.columns == column_names
    """
    import pandas as pd
    assert column_names[0] == 'segment'

    volume = brick.volume
    if exclude_halo and (brick.physical_box != brick.logical_box).any():
        internal_box = box_intersection( brick.logical_box, brick.physical_box ) - brick.physical_box[0]
        volume = volume[box_to_slicing(*internal_box)]
        volume = np.asarray(volume, order='C')

    # We always compute segment and voxel_count
    TRIVIAL_COLUMNS = set(['segment', 'voxel_count'])
    counts = pd.Series(volume.ravel('K')).value_counts(sort=False)
    segment_ids = counts.index.values
    assert segment_ids.dtype == volume.dtype
    
    # Other columns are computed only if needed
    if set(column_names) - TRIVIAL_COLUMNS:
        # Must remap to consecutive segments before calling extractRegionFeatures()
        remapped_ids = np.arange(len(segment_ids), dtype=np.uint32)
        mapper = dvidutils.LabelMapper( segment_ids, remapped_ids )
        remapped_vol = mapper.apply(volume)
        assert remapped_vol.dtype == np.uint32
        remapped_vol = vigra.taggedView( remapped_vol, 'zyx' )

        # Compute (local) bounding boxes.
        acc = vigra.analysis.extractRegionFeatures( np.zeros(remapped_vol.shape, np.float32), remapped_vol,
                                                    ["Count", "Coord<Minimum >", "Coord<Maximum >"]  )
        assert (acc["Count"] == counts.values).all()
        
        # Use int64: int32 is dangerous because multiplying them together quickly overflows
        local_bb_starts = acc["Coord<Minimum >"].astype(np.int64)
        local_bb_stops = (1 + acc["Coord<Maximum >"]).astype(np.int64)

        global_bb_starts = local_bb_starts + brick.physical_box[0]
        global_bb_stops = local_bb_stops + brick.physical_box[0]

        if 'block_list' in column_names:
            block_lists = []
            for remapped_id, start, stop in zip(remapped_ids, local_bb_starts, local_bb_stops):
                local_box = np.array((start, stop))
                binary = (remapped_vol[box_to_slicing(*local_box)] == remapped_id)
                
                # This downsample function respects block-alignment, since we're providing the local_box
                reduced, block_bb = downsample_binary_3d_suppress_zero(binary, BLOCK_WIDTH, local_box)
                
                local_block_indexes = np.transpose(reduced.nonzero())
                local_block_starts = BLOCK_WIDTH * (block_bb[0] + local_block_indexes)
                global_block_starts = brick.physical_box[0] + local_block_starts
                block_lists.append(global_block_starts)
    
    # Segment is always first.
    df = pd.DataFrame(columns=column_names)
    df['segment'] = segment_ids

    # Append columns in-order
    for column in column_names:
        if column == 'voxel_count':
            df['voxel_count'] = counts.values
        
        if column == 'block_list':
            df['block_list'] = block_lists
        
        if column == 'bounding_box_start':
            df['bounding_box_start'] = list(global_bb_starts) # Must convert to list or pandas complains about non-1D-data.
        
        if column == 'bounding_box_stop':
            df['bounding_box_stop'] = list(global_bb_stops) # ditto

        if column in ('z0', 'y0', 'x0'):
            df[column] = global_bb_starts[:, ('z0', 'y0', 'x0').index(column)]

        if column in ('z1', 'y1', 'x1'):
            df[column] = global_bb_stops[:, ('z1', 'y1', 'x1').index(column)]
        
        if column == 'compressed_bytes':
            raise RuntimeError("Can't compute compressed_bytes in this function.")

    if exclude_zero:
        df.drop(df.index[df.segment == 0], inplace=True)

    return df
Beispiel #18
0
    def execute(self):
        self._init_service()
        options = self.config["roistats"]

        if not options["roi-server"]:
            assert isinstance(self.input_service, DvidVolumeService)
            options["roi-server"] = self.input_service.base_service.server

        if not options["roi-uuid"]:
            assert isinstance(self.input_service, DvidVolumeService)
            options["roi-uuid"] = self.input_service.base_service.uuid

        options["roi-uuid"] = resolve_ref(options["roi-server"],
                                          options["roi-uuid"])

        is_supervoxels = (isinstance(self.input_service, DvidVolumeService)
                          and self.input_service.base_service.supervoxels
                          )  # noqa
        bodies = load_body_list(options["subset-bodies"], is_supervoxels)
        assert len(
            bodies) > 0, "Please provide a list of subset-bodies to process"

        scale = options["analysis-scale"]
        bounding_box = self.input_service.bounding_box_zyx
        assert not (bounding_box % 2**5).any(), \
            "Make sure your configured bounding box is divisible by 32px at scale 0."
        brick_shape = self.input_service.preferred_message_shape
        assert not (brick_shape % 2**5).any(), \
            "Make sure your preferred message shape divides into 32px blocks at scale 0"

        with Timer("Fetching ROI volume", logger):
            roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume(
                options["roi-server"], options["roi-uuid"], options["rois"],
                False, bounding_box // 2**5)

        if len(overlaps) > 0:
            logger.warn(
                f"Some of your ROIs overlap!  Here's an incomplete list:\n{overlaps}"
            )

        with Timer("Determining brick set", logger):
            # Determine which bricks intersect our ROIs
            roi_brick_shape = self.input_service.preferred_message_shape // 2**5
            roi_brick_boxes = boxes_from_mask((roi_vol_s5 != 0),
                                              roi_box_s5[0],
                                              roi_brick_shape,
                                              clipped=False)
            roi_brick_boxes *= 2**5
            roi_brick_boxes = box_intersection(
                roi_brick_boxes, self.input_service.bounding_box_zyx)

            # Non-intersecting boxes have negative shape -- drop them.
            roi_brick_boxes = roi_brick_boxes[(
                (roi_brick_boxes[:, 1, :] - roi_brick_boxes[:, 0, :]) > 0).all(
                    axis=1)]
            roi_brick_coords_df = pd.DataFrame(roi_brick_boxes[:, 0, :],
                                               columns=[*'zyx'])
            try:
                body_brick_coords_df = self.input_service.sparse_brick_coords_for_labels(
                    bodies)
            except NotImplementedError:
                # Use all bricks in the ROIs, and use the special label -1 to
                # indicate that all bodies in the list might be found there.
                # (See below.)
                brick_coords_df = roi_brick_coords_df
                brick_coords_df['label'] = -1
            else:
                brick_coords_df = body_brick_coords_df.merge(
                    roi_brick_coords_df, 'inner', on=[*'zyx'])

            assert brick_coords_df.columns.tolist() == [*'zyx', 'label']
            np.save('brick-coords.npy',
                    brick_coords_df.to_records(index=False))

        with Timer("Preparing bricks", logger):
            boxes_and_roi_bricks = []
            for coord, brick_labels in brick_coords_df.groupby(
                [*'zyx'])['label'].agg(tuple).iteritems():
                if brick_labels == (-1, ):
                    # No sparse body brick locations were found above.
                    # Search for all bodies in all bricks.
                    brick_labels = bodies

                box = np.array((coord, coord))
                box[1] += brick_shape
                box = box_intersection(box, bounding_box)

                roi_brick_box = ((box // 2**5) - roi_box_s5[0])
                roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box)
                boxes_and_roi_bricks.append((box, roi_brick_s5, brick_labels))

        scaled_shape = brick_shape // (2**scale)
        logger.info(
            f"Prepared {len(boxes_and_roi_bricks)} bricks of scale-0 shape "
            f"{(*brick_shape[::-1],)} ({(*scaled_shape[::-1],)} at scale-{scale})"
        )

        all_stats = []
        batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])]
        logger.info(f"Processing {len(batches)} batches")
        for i, batch_boxes_and_bricks in enumerate(batches):
            with Timer(f"Batch {i:02d}", logger):
                batch_stats = self._execute_batch(scale,
                                                  batch_boxes_and_bricks)
                all_stats.append(batch_stats)

        all_stats = pd.concat(all_stats, ignore_index=True)
        all_stats = all_stats.groupby(['body', 'roi_id'],
                                      as_index=False)['voxels'].sum()

        roi_names = pd.Series(["<none>", *options["rois"]], name='roi')
        roi_names.index.name = 'roi_id'
        all_stats = all_stats.merge(roi_names, 'left', on='roi_id')
        all_stats = all_stats.sort_values(['body', 'roi_id'])

        if scale > 0:
            all_stats.rename(columns={'voxels': f'voxels_s{scale}'},
                             inplace=True)

        with Timer(f"Writing stats ({len(all_stats)} rows)", logger):
            np.save('roi-stats.npy', all_stats.to_records(index=False))
            all_stats.to_csv('roi-stats.csv', index=False, header=True)
    def execute(self):
        self._sanitize_config()
        config = self.config_data
        options = config["options"]

        resource_mgr_client = ResourceManagerClient(options["resource-server"], options["resource-port"])
        volume_service = VolumeService.create_from_config(config["input"], self.config_dir, resource_mgr_client)

        input_csv = config["options"]["input-table"]
        with Timer(f"Reading {input_csv}", logger):
            coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES)
            points = coordinate_table_df[['z', 'y', 'x']].values

        rescale = config["options"]["rescale-points-to-level"]
        if rescale != 0:
            points //= 2**rescale

        # All points must lie within the input volume        
        points_box = [points.min(axis=0), 1+points.max(axis=0)]
        if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all():
            raise RuntimeError("The point list includes points outside of the volume bounding box.")

        with Timer("Sorting points by Brick ID", logger):
            # 'Brick ID' is defined as the divided corner coordinate 
            brick_shape = volume_service.preferred_message_shape
            brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 )
            brick_ids_and_points = lexsort_columns(brick_ids_and_points)

            brick_ids = brick_ids_and_points[: ,:3]
            points = brick_ids_and_points[:, 3:]
            
            # Extract the first row of each group to get the set of unique brick IDs
            point_group_spans = groupby_spans_presorted(brick_ids)
            point_group_starts = (start for start, stop in point_group_spans)
            unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)]

        with Timer("Distributing points", logger):
            # This is faster than pandas.DataFrame.groupby() for large data
            point_groups = groupby_presorted(points, brick_ids)
            id_and_ptgroup = self.sc.parallelize(zip(map(tuple, unique_brick_ids), point_groups))
        
        with Timer("Constructing sparse mask", logger):
            # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD,
            # with only a subset of Bricks (rather than a dense RDD containing every brick
            # within the volume bounding box).
            # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched.
            brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)])

            brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0])
            brick_mask = np.zeros(brick_mask_shape, bool)
            brick_mask_coords = unique_brick_ids - brick_mask_box[0]
            brick_mask[tuple(brick_mask_coords.transpose())] = True
            sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape)

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.sc, target_partition_size_voxels, sbm, lazy=True)
        
        with Timer("Joining point groups with bricks", logger):
            id_and_brick = brickwall.bricks.map(lambda brick: (tuple(brick.logical_box[0] // brick_shape), brick))
            brick_and_ptgroup = id_and_brick.join(id_and_ptgroup).values() # discard id

        def sample_points(brick_and_points):
            """
            Given a Brick and array of points (N,3) that lie within it,
            sample labels from the points within the brick and return
            a record array containing the points and the sampled labels.
            """
            brick, points = brick_and_points

            result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)]
            result = np.zeros((len(points),), result_dtype)
            result['z'] = points[:,0]
            result['y'] = points[:,1]
            result['x'] = points[:,2]

            # Make relative to brick offset
            points -= brick.physical_box[0]
            
            result['label'] = brick.volume[tuple(points.transpose())]
            return result

        with Timer("Sampling bricks", logger):
            brick_samples = brick_and_ptgroup.map(sample_points).collect()

        with Timer("Concatenating samples", logger):
            sample_table = np.concatenate(brick_samples)

        with Timer("Sorting samples", logger):
            sample_table.sort()

        with Timer("Sorting table", logger):
            if rescale == 0:
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)
            else:
                # sample_table is sorted by RESCALED coordiante,
                # so sort our table the same way
                coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale)
                coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale)
                coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale)
                coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True)
                del coordinate_table_df['rz']
                del coordinate_table_df['ry']
                del coordinate_table_df['rx']

        # Now that samples and input rows are sorted identically,
        # append the results
        output_col = options["output-column"]
        coordinate_table_df[output_col] = sample_table['label']

        with Timer("Exporting samples", logger):
            coordinate_table_df.to_csv(config["options"]["output-table"], header=True, index=False)

        logger.info("DONE.")
Beispiel #20
0
    def init_boxes(self, volume_service, subset_labels, roi):
        sbm = None
        if roi:
            base_service = volume_service.base_service
            assert isinstance(base_service, DvidVolumeService), \
                "Can't specify an ROI unless you're using a dvid input"

            assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \
                "The 'roi' option doesn't support adapters other than 'rescale-level'"
            scale = 0
            if isinstance(volume_service, ScaledVolumeService):
                scale = volume_service.scale_delta
                assert scale <= 5, \
                    "The 'roi' option doesn't support volumes downscaled beyond level 5"

            server, uuid, _seg_instance = base_service.instance_triple

            brick_shape = volume_service.preferred_message_shape
            assert not (brick_shape % 2**(5-scale)).any(), \
                "If using an ROI, select a brick shape that is divisible by 32"

            seg_box = volume_service.bounding_box_zyx
            seg_box = round_box(seg_box, brick_shape)
            seg_box_s5 = seg_box // 2**(5 - scale)

            with Timer(f"Fetching mask for ROI '{roi}'", logger):
                roi_mask_s5, roi_box_s5 = fetch_roi(server,
                                                    uuid,
                                                    roi,
                                                    format='mask')

            # Restrict to input bounding box
            clipped_roi_box_s5 = box_intersection(seg_box_s5, roi_box_s5)
            clipped_roi_mask_s5 = extract_subvol(
                roi_mask_s5, clipped_roi_box_s5 - roi_box_s5[0])

            # Align to brick grid
            aligned_roi_box_s5 = round_box(clipped_roi_box_s5,
                                           brick_shape // 2**5, 'out')
            padding = (aligned_roi_box_s5 - clipped_roi_box_s5)
            padding[0] *= -1
            aligned_roi_mask_s5 = np.pad(clipped_roi_mask_s5,
                                         padding.transpose())

            # At the service native scale
            aligned_roi_box = (2**(5 - scale) * aligned_roi_box_s5)
            logger.info(
                f"Brick-aligned ROI '{roi}' has bounding-box {aligned_roi_box[:, ::-1].tolist()}"
            )

            # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0.
            sbm = SparseBlockMask.create_from_highres_mask(
                aligned_roi_mask_s5, 2**(5 - scale), aligned_roi_box,
                brick_shape)
        elif subset_labels:
            try:
                sbm = volume_service.sparse_block_mask_for_labels(
                    [*subset_labels])
                if ((sbm.box[1] - sbm.box[0]) == 0).any():
                    raise RuntimeError(
                        "Could not find sparse masks for any of the subset-labels"
                    )
            except NotImplementedError:
                sbm = None

        if sbm is None:
            boxes = boxes_from_grid(volume_service.bounding_box_zyx,
                                    volume_service.preferred_message_shape,
                                    clipped=True)
            return np.array([*boxes])
        else:
            boxes = sbm.sparse_boxes(brick_shape)
            boxes = np.array(boxes)

            # Clip
            boxes[:, 0, :] = np.maximum(volume_service.bounding_box_zyx[0],
                                        boxes[:, 0, :])
            boxes[:, 1, :] = np.minimum(volume_service.bounding_box_zyx[1],
                                        boxes[:, 1, :])
            assert (boxes[:,0,:] < boxes[:,1,:]).all(), \
                "After cropping to input volume, some bricks disappeared."

            return boxes
Beispiel #21
0
def test_extract_halos():
    halo = 1
    grid = Grid( (10,20), (0,0), halo )
    bounding_box = np.array([(15,30), (95,290)])
    volume = np.random.randint(0,10, (100,300) )

    bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() )

    outer_halos = extract_halos(bricks, grid, 'outer').compute()
    inner_halos = extract_halos(bricks, grid, 'inner').compute()

    for halo_type, halo_bricks in zip(('outer', 'inner'), (outer_halos, inner_halos)):
        for hb in halo_bricks:
            # Even bricks on the edge of the volume
            # (which have smaller physical boxes than logical boxes)
            # return halos which correspond to the original
            # logical box (except for the halo axis).
            # (Each halo's "logical box" still corresponds to
            # the brick it was extracted from.)
            if halo_type == 'outer':
                assert (hb.physical_box[0] != hb.logical_box[0]).sum() == 1
                assert (hb.physical_box[1] != hb.logical_box[1]).sum() == 1
            else:
                assert (hb.physical_box != hb.logical_box).sum() == 1

            # The bounding box above is not grid aligned,
            # so blocks on the volume edge will only have partial data
            # (i.e. a smaller physical_box than logical_box)
            # However, halos are always produced to correspond to the logical_box size,
            # and zero-padded if necessary to achieve that size.
            # Therefore, only compare the actually valid portion of the halo here with the expected volume.
            # The other voxels should be zeros.
            valid_box = box_intersection(bounding_box, hb.physical_box)
            halo_vol = extract_subvol(hb.volume, valid_box - hb.physical_box[0])
            expected_vol = extract_subvol(volume, valid_box)
            assert (halo_vol == expected_vol).all()
            
            # Other voxels should be zero
            full_halo_vol = hb.volume.copy()
            overwrite_subvol(full_halo_vol, valid_box - hb.physical_box[0], 0)
            assert (full_halo_vol == 0).all()

    rows = []
    for hb in chain(outer_halos):
        rows.append([*hb.physical_box.flat, hb, 'outer'])

    for hb in chain(inner_halos):
        rows.append([*hb.physical_box.flat, hb, 'inner'])
    
    halo_df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brick', 'halo_type'])
    
    halo_counts = halo_df.groupby(['y0', 'x0', 'y1', 'x1']).size()

    # Since the bricks' physical boxes are all clipped to the overall bounding-box,
    # every outer halo should have a matching inner halo from a neighboring brick.
    # (This would not necessarily be true for Bricks that are initialized from a sparse mask.)
    assert halo_counts.min() == 2
    assert halo_counts.max() == 2
    
    for _box, halos_df in halo_df.groupby(['y0', 'x0', 'y1', 'x1']):
        assert set(halos_df['halo_type']) == set(['outer', 'inner'])

        brick0 = halos_df.iloc[0]['brick']
        brick1 = halos_df.iloc[1]['brick']
        assert (brick0.volume == brick1.volume).all()
Beispiel #22
0
def stats_df_from_brick(column_names,
                        brick,
                        exclude_zero=True,
                        exclude_halo=True):
    """
    For a given brick, return a DataFrame of statistics for the segments it contains.
    
    Args:
    
        column_names (list):
            Which statistics to compute. Anything from COLUMNS_INFO
            is permitted, except compressed_bytes.
            The 'segment' column must be first in the list.
        
        brick (Brick):
            The brick to process
        
        exclude_zero (bool):
            Discard statistics for segment=0.
        
        exclude_halo (bool):
            Exclude voxels that lie outside the Brick's logical_box.
    
    Returns:
        pd.DataFrame, with df.columns == column_names
    """
    import pandas as pd
    assert column_names[0] == 'segment'

    volume = brick.volume
    if exclude_halo and (brick.physical_box != brick.logical_box).any():
        internal_box = box_intersection(
            brick.logical_box, brick.physical_box) - brick.physical_box[0]
        volume = volume[box_to_slicing(*internal_box)]
        volume = np.asarray(volume, order='C')

    # We always compute segment and voxel_count
    TRIVIAL_COLUMNS = set(['segment', 'voxel_count'])
    counts = pd.Series(volume.ravel('K')).value_counts(sort=False)
    segment_ids = counts.index.values
    assert segment_ids.dtype == volume.dtype

    # Other columns are computed only if needed
    if set(column_names) - TRIVIAL_COLUMNS:
        # Must remap to consecutive segments before calling extractRegionFeatures()
        remapped_ids = np.arange(len(segment_ids), dtype=np.uint32)
        mapper = dvidutils.LabelMapper(segment_ids, remapped_ids)
        remapped_vol = mapper.apply(volume)
        assert remapped_vol.dtype == np.uint32
        remapped_vol = vigra.taggedView(remapped_vol, 'zyx')

        # Compute (local) bounding boxes.
        acc = vigra.analysis.extractRegionFeatures(
            np.zeros(remapped_vol.shape, np.float32), remapped_vol,
            ["Count", "Coord<Minimum >", "Coord<Maximum >"])
        assert (acc["Count"] == counts.values).all()

        # Use int64: int32 is dangerous because multiplying them together quickly overflows
        local_bb_starts = acc["Coord<Minimum >"].astype(np.int64)
        local_bb_stops = (1 + acc["Coord<Maximum >"]).astype(np.int64)

        global_bb_starts = local_bb_starts + brick.physical_box[0]
        global_bb_stops = local_bb_stops + brick.physical_box[0]

        if 'block_list' in column_names:
            block_lists = []
            for remapped_id, start, stop in zip(remapped_ids, local_bb_starts,
                                                local_bb_stops):
                local_box = np.array((start, stop))
                binary = (remapped_vol[box_to_slicing(
                    *local_box)] == remapped_id)

                # This downsample function respects block-alignment, since we're providing the local_box
                reduced, block_bb = downsample_binary_3d_suppress_zero(
                    binary, BLOCK_WIDTH, local_box)

                local_block_indexes = np.transpose(reduced.nonzero())
                local_block_starts = BLOCK_WIDTH * (block_bb[0] +
                                                    local_block_indexes)
                global_block_starts = brick.physical_box[0] + local_block_starts
                block_lists.append(global_block_starts)

    # Segment is always first.
    df = pd.DataFrame(columns=column_names)
    df['segment'] = segment_ids

    # Append columns in-order
    for column in column_names:
        if column == 'voxel_count':
            df['voxel_count'] = counts.values

        if column == 'block_list':
            df['block_list'] = block_lists

        if column == 'bounding_box_start':
            df['bounding_box_start'] = list(
                global_bb_starts
            )  # Must convert to list or pandas complains about non-1D-data.

        if column == 'bounding_box_stop':
            df['bounding_box_stop'] = list(global_bb_stops)  # ditto

        if column in ('z0', 'y0', 'x0'):
            df[column] = global_bb_starts[:, ('z0', 'y0', 'x0').index(column)]

        if column in ('z1', 'y1', 'x1'):
            df[column] = global_bb_stops[:, ('z1', 'y1', 'x1').index(column)]

        if column == 'compressed_bytes':
            raise RuntimeError(
                "Can't compute compressed_bytes in this function.")

    if exclude_zero:
        df.drop(df.index[df.segment == 0], inplace=True)

    return df
Beispiel #23
0
    def execute(self):
        scale = self._init_service()

        options = self.config["roistats"]
        server = self.input_service.base_service.server
        uuid = self.input_service.base_service.uuid
        rois = options["rois"]

        bodies = load_body_list(options["subset-bodies"],
                                self.input_service.base_service.supervoxels)
        assert len(
            bodies) > 0, "Please provide a list of subset-bodies to process"

        bounding_box = self.input_service.bounding_box_zyx
        assert not (bounding_box % 2**(5-scale)).any(), \
            "Make sure your configured bounding box is divisible by 32px at scale 0"
        brick_shape = self.input_service.preferred_message_shape
        assert not (brick_shape % 2**(5-scale)).any(), \
            "Make sure your preferred message shape divides into 32px blocks at scale 0"

        with Timer("Fetching ROI volume", logger):
            roi_vol_s5, roi_box_s5, overlaps = fetch_combined_roi_volume(
                server, uuid, rois, False, bounding_box // 2**(5 - scale))

        if len(overlaps) > 0:
            logger.warn(
                f"Some of your ROIs overlap!  Here's an incomplete list:\n{overlaps}"
            )

        with Timer("Determining brick set", logger):
            brick_coords_df = self.input_service.sparse_brick_coords_for_labels(
                bodies)
            np.save('brick-coords.npy',
                    brick_coords_df.to_records(index=False))

        with Timer(f"Preparing bricks", logger):
            boxes_and_roi_bricks = []
            for coord, labels in brick_coords_df.groupby(
                [*'zyx'])['label'].agg(tuple).iteritems():
                box = np.array((coord, coord))
                box[1] += brick_shape
                box = box_intersection(box, bounding_box)

                roi_brick_box = ((box // 2**(5 - scale)) - roi_box_s5[0])
                roi_brick_s5 = extract_subvol(roi_vol_s5, roi_brick_box)
                boxes_and_roi_bricks.append((box, roi_brick_s5, labels))

        logger.info(
            f"Prepared {len(boxes_and_roi_bricks)} bricks of shape {(*brick_shape[::-1],)}"
        )

        all_stats = []
        batches = [*iter_batches(boxes_and_roi_bricks, options["batch-size"])]
        logger.info(f"Processing {len(batches)} batches")
        for i, batch_boxes_and_bricks in enumerate(batches):
            with Timer(f"Batch {i:02d}", logger):
                batch_stats = self._execute_batch(scale,
                                                  batch_boxes_and_bricks)
                all_stats.append(batch_stats)

        all_stats = pd.concat(all_stats, ignore_index=True)
        all_stats = all_stats.groupby(['body', 'roi_id'],
                                      as_index=False)['voxels'].sum()

        roi_names = pd.Series(["<none>", *rois], name='roi')
        roi_names.index.name = 'roi_id'
        all_stats = all_stats.merge(roi_names, 'left', on='roi_id')
        all_stats = all_stats.sort_values(['body', 'roi_id'])

        if scale > 0:
            all_stats.rename(columns={'voxels': f'voxels_s{scale}'},
                             inplace=True)

        with Timer(f"Writing stats ({len(all_stats)} rows)", logger):
            np.save('roi-stats.npy', all_stats.to_records(index=False))
            all_stats.to_csv('roi-stats.csv', index=False, header=True)
def _measure_tbar_mito_distances(seg_src, mito_src, body, tbar_points_s0,
                                 primary_point_index, radius_s0, scale,
                                 mito_min_size_s0, mito_scale_offset):
    """
    Download the segmentation for a single body around one tbar point as a mask,
    and also the corresponding mitochondria mask for those voxels.
    Then compute the minimum distance from any mitochondria voxel to all tbar
    points in the region (not just the one tbar we chose as our focal point).

    Not all of the computed distances are used, however.
    Only the results for tbars which are closer to their nearest mitochondria
    than they are to the subvolume edge can be trusted.

    The results are written into the columns of tbar_points_s0.
    Points for which a mito was found are marked as 'done', and the
    mito-distance is recorded. Also, the closest point in the mito is stored
    in the mito-x/y/z columns.

    Args:
        seg_src:
            (server, uuid, instance) OR a flyemflows VolumeService
            Labelmap instance for the neuron segmentation.
        mito_src:
            (server, uuid, instance) OR a flyemflows VolumeService
            Labelmap instance for the mitochondria "mask"
            (actually a segmentation with a few classes.)
        body:
            The body ID on which the tbars reside.
        tbar_points_s0:
            DataFrame with ALL tbar coordinates you plan to analyze.
            The coordinates should be specified at scale 0,
            even if you are specifying a different scale to use for the analysis.
            We update the row of the "primary" point, but we also update any
            other rows we can, since the mask we download might happen to
            catch other tbars, too.
        primary_point_index:
            An index value, indicating which row of tbar_points_s0 should be
            the "primary" point around which the body/mito masks are downloaded.
        radius_s0:
            The radius of segmentation around the "primary" point to fetch and
            analyze for mito-tbar distances. Specified at scale 0, regardless of
            the scale you want to be used for performing the analysis.
        scale:
            To save time and RAM, it's faster to perform the analysis using a
            lower resolution.  Specify which scale to use.
        mito_min_size_s0:
            Mito mask voxels that fall outside the body mask will be discarded,
            and then the mito mask is segmented via a connected components step.
            Components below this size threshold will be discarded before
            distances are computed.  Specify this threshold in units of scale 0
            voxels, regardless of the scale at which the analysis is being performed.
        mito_scale_offset:
            If the mito mask layer is stored at a lower resolution than the
            neuron segmentation, specify the difference between the two scales
            using this parameter. (It's assumed that the scales differ by a power of two.)
            For instance, if the segmentation is stored at 8nm resolution,
            but the mito masks are stored at 16nm resolution, use mito_scale_offset=1.

    Returns:
        The number of tbars for which a nearby mitochondria was found in this batch.
        (Where "batch" is the set of not-yet-done tbars that overlap with the body mask
        near the "primary" tbar point.)
    """
    assert not tbar_points_s0['done'].loc[primary_point_index]
    primary_point_s0 = tbar_points_s0[[*'zyx']].loc[primary_point_index].values
    batch_tbars = tbar_points_s0.copy()

    # Adjust for scale
    primary_point = np.asarray(primary_point_s0) // (2**scale)
    mito_min_size = mito_min_size_s0 // ((2**scale)**3)
    radius = radius_s0 // (2**scale)
    batch_tbars[[*'zyx']] //= (2**scale)

    body_mask, mask_box, body_block_corners = _fetch_body_mask(
        seg_src, primary_point, radius, scale, body,
        batch_tbars[[*'zyx']].values)
    mito_mask = _fetch_mito_mask(mito_src, body_mask, mask_box,
                                 body_block_corners, scale, mito_min_size,
                                 mito_scale_offset)

    if EXPORT_DEBUG_VOLUMES:
        print(
            f"Primary point in the local volume is: {(primary_point - mask_box[0])[::-1]}"
        )
        np.save('/tmp/body_mask.npy', 1 * body_mask.astype(np.uint64))
        np.save('/tmp/mito_mask.npy', 2 * mito_mask.astype(np.uint64))

    if (body_mask & mito_mask).sum() == 0:
        # The body mask contains no mitochondria at all.
        if (body_mask[0, :, :].any() or body_mask[-1, :, :].any()
                or body_mask[:, 0, :].any() or body_mask[:, -1, :].any()
                or body_mask[:, :, 0].any() or body_mask[:, :, -1].any()):
            # The body mask touches the edge of the volume,
            # so we should expand our radius and keep trying.
            return 0
        else:
            # Doesn't touch volume edges.
            # We're done with it, even though we can't find a mito.
            tbar_points_s0.loc[primary_point_index, 'done'] = True
            return 1

    # Find the set of all points that fall within the mask.
    # That's that batch of tbars we'll find mito distances for.
    batch_tbars = batch_tbars.query('not done')

    in_box = (batch_tbars[[*'zyx']] >= mask_box[0]).all(
        axis=1) & (batch_tbars[[*'zyx']] < mask_box[1]).all(axis=1)
    batch_tbars = batch_tbars.loc[in_box]

    tbars_local = batch_tbars[[*'zyx']] - mask_box[0]
    in_mask = body_mask[tuple(tbars_local.values.transpose())]
    batch_tbars = batch_tbars.iloc[in_mask]
    assert len(batch_tbars) >= 1

    with Timer(f"Calculating distances for batch of {len(batch_tbars)} points",
               logger):
        tbars_local = batch_tbars[[*'zyx']] - mask_box[0]
        distances, mito_points_local = _calc_distances(body_mask, mito_mask,
                                                       tbars_local.values)

    mito_points = mito_points_local + mask_box[0]
    batch_tbars['mito-distance'] = distances
    batch_tbars.loc[:, ['mito-z', 'mito-y', 'mito-x']] = mito_points

    batch_cube = [primary_point - radius, primary_point + radius + 1]

    valid_rows = []
    for i in batch_tbars.index:
        # If we found a mito for this tbar, we can only keep it if
        # the tbar is closer to the mito than it is to the edge of
        # the mask volume. Otherwise, we can't guarantee that this
        # mito is the globally closest mito to the tbar.  (There
        # could be one just outside the mask subvolume that is
        # closer.)

        # Define a box (cube) around the point,
        # whose radius is the mito distance.
        p = batch_tbars[[*'zyx']].loc[i].values
        d = batch_tbars['mito-distance'].loc[i]
        p_cube = [p - d, p + d + 1]

        # If the cube around our point doesn't exceed the box that was
        # searched for this batch, we can believe this mito distance.
        if (p_cube == box_intersection(p_cube, batch_cube)).all():
            valid_rows.append(i)

    logger.info(
        f"Kept {len(valid_rows)}/{len(batch_tbars)} mito distances (R={radius_s0})"
    )
    batch_tbars = batch_tbars.loc[valid_rows]

    # Update the input DataFrame (and rescale)
    tbar_points_s0.loc[
        batch_tbars.index,
        'mito-distance'] = (2**scale) * batch_tbars['mito-distance']
    tbar_points_s0.loc[batch_tbars.index, ['mito-z', 'mito-y', 'mito-x']] = (
        2**scale) * batch_tbars[['mito-z', 'mito-y', 'mito-x']]
    tbar_points_s0.loc[batch_tbars.index, 'done'] = True

    return len(batch_tbars)
Beispiel #25
0
def assemble_brick_fragments(fragments, output_accessor_fn=None):
    """
    Given a list of Bricks with identical logical_boxes, splice their volumes
    together into a final Brick that contains a full volume containing all of
    the fragments.

    Note:
        Brick 'fragments' are also just Bricks, whose physical_box does
        not cover the entire logical_box for the brick.
        Each fragment's physical_box indicates where that fragment's data
        should be located within the final returned Brick.

    Args:
        fragments:
            TODO: docs.

        output_accessor_fn:
            Callable with signature: f(box) -> ndarray
            TODO: docs.

    Returns:
        A Brick containing the data from all fragments,
        UNLESS the fully assembled fragments would not intersect
        with the Brick's own logical_box (i.e. all fragments fall
        within the halo), in which case None is returned.

    Note:
        If the fragment physical_boxes are not disjoint, the results
        are undefined.  That is, if two fragments overlap, there's
        no guarantee about which one "wins" for the overlapping region.
    """
    fragments = list(fragments)

    # All logical boxes must be the same
    logical_boxes = np.asarray([frag.logical_box for frag in fragments])
    assert (logical_boxes == logical_boxes[0]).all(), \
        "Cannot assemble brick fragments from different logical boxes. "\
        "They belong to different bricks!"
    final_logical_box = fragments[0].logical_box
    final_location_id = fragments[0].location_id

    # The final physical box is the min/max of all fragment physical extents.
    physical_boxes = np.array([frag.physical_box for frag in fragments])
    assert physical_boxes.ndim == 3  # (N, 2, Dim)
    assert physical_boxes.shape == (len(fragments), 2,
                                    final_logical_box.shape[1])

    final_physical_box = np.asarray(
        (np.min(physical_boxes[:, 0, :],
                axis=0), np.max(physical_boxes[:, 1, :], axis=0)))

    intersects_interior = False
    for frag_pbox in physical_boxes:
        interior_box = box_intersection(frag_pbox, final_logical_box)
        if (interior_box[1] - interior_box[0] > 0).all():
            intersects_interior = True

    if not intersects_interior:
        # All fragments lie completely within the halo;
        # none intersect with the interior logical_box,
        # so we don't bother keeping this brick.
        return None

    final_volume_shape = final_physical_box[1] - final_physical_box[0]
    dtype = fragments[0].volume.dtype

    # If the physical boxes don't completely fill the final_box,
    # then we will need to use the output_accessor_fn (if given).
    if output_accessor_fn is None or is_box_coverage_complete(
            physical_boxes, final_physical_box):
        final_volume = np.zeros(final_volume_shape, dtype)
    else:
        final_volume = output_accessor_fn(final_physical_box)

    for frag in fragments:
        internal_box = frag.physical_box - final_physical_box[0]
        overwrite_subvol(final_volume, internal_box, frag.volume)

        # Recompress fragment now that we're done with it.
        frag.compress()

        ## It's tempting to destroy the fragment to save RAM,
        ## but the fragment might be needed by more than one final brick.
        ## (Also, it might be needed twice if a Worker gets restarted.)
        # frag.destroy()

    compression = fragments[0].compression
    brick = Brick(final_logical_box,
                  final_physical_box,
                  final_volume,
                  location_id=final_location_id,
                  compression=compression)
    brick.compress()
    return brick
Beispiel #26
0
    def execute(self):
        self._sanitize_config()

        input_config = self.config["input"]
        options = self.config["samplepoints"]
        resource_config = self.config["resource-manager"]

        resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"])
        volume_service = VolumeService.create_from_config(input_config, resource_mgr_client)

        input_csv = options["input-table"]
        with Timer(f"Reading {input_csv}", logger):
            coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES)
            points = coordinate_table_df[['z', 'y', 'x']].values

        rescale = options["rescale-points-to-level"]
        if rescale != 0:
            points //= (2**rescale)

        # All points must lie within the input volume        
        points_box = [points.min(axis=0), 1+points.max(axis=0)]
        if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all():
            raise RuntimeError("The point list includes points outside of the volume bounding box.")

        with Timer("Sorting points by Brick ID", logger):
            # 'Brick ID' is defined as the divided corner coordinate 
            brick_shape = volume_service.preferred_message_shape
            brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 )
            brick_ids_and_points = lexsort_columns(brick_ids_and_points)

            brick_ids = brick_ids_and_points[: ,:3]
            points = brick_ids_and_points[:, 3:]
            
            # Extract the first row of each group to get the set of unique brick IDs
            point_group_spans = groupby_spans_presorted(brick_ids)
            point_group_starts = (start for start, stop in point_group_spans)
            unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)]

        with Timer("Constructing sparse mask", logger):
            # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD,
            # with only a subset of Bricks (rather than a dense RDD containing every brick
            # within the volume bounding box).
            # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched.
            brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)])

            brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0])
            brick_mask = np.zeros(brick_mask_shape, bool)
            brick_mask_coords = unique_brick_ids - brick_mask_box[0]
            brick_mask[tuple(brick_mask_coords.transpose())] = True
            sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape)

        with Timer("Initializing BrickWall", logger):
            # Aim for 2 GB RDD partitions when loading segmentation
            GB = 2**30
            target_partition_size_voxels = 2 * GB // np.uint64().nbytes
            brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, 0, sbm, lazy=True)
        
        with Timer(f"Grouping {len(points)} points", logger):
            # This is faster than pandas.DataFrame.groupby() for large data
            point_groups = groupby_presorted(points, brick_ids)
            id_and_ptgroups = list(zip(unique_brick_ids, point_groups))
            num_groups = len(id_and_ptgroups)

        with Timer(f"Join {num_groups} point groups with bricks", logger):
            id_and_ptgroups = dask.bag.from_sequence( id_and_ptgroups,
                                                      npartitions=brickwall.bricks.npartitions )

            id_and_ptgroups = id_and_ptgroups.map(lambda i_p: (*i_p[0], i_p[1]))
            id_and_ptgroups_df = id_and_ptgroups.to_dataframe(columns=['z', 'y', 'x', 'pointgroup'])
            
            ids_and_bricks = brickwall.bricks.map(lambda brick: (*(brick.logical_box[0] // brick_shape), brick))
            ids_and_bricks_df = ids_and_bricks.to_dataframe(columns=['z', 'y', 'x', 'brick'])

            def set_brick_id_index(df):
                def set_brick_id(df):
                    df['brick_id'] = encode_coords_to_uint64( df[['z', 'y', 'x']].values.astype(np.int32) )
                    return df
                df['brick_id'] = np.uint64(0)
                df = df.map_partitions(set_brick_id, meta=df)

                # Note: bricks and pointgroups are already sorted by
                # brick scan-order so, brick_id is already sorted.
                # Specifying sorted=True is critical to performance here.
                df = df.set_index('brick_id', sorted=True)
                return df

            # Give them matching indexes
            ids_and_bricks_df = set_brick_id_index(ids_and_bricks_df)
            id_and_ptgroups_df = set_brick_id_index(id_and_ptgroups_df)

            # Join (index-on-index, so it should be fast)
            ptgroup_and_brick_df = id_and_ptgroups_df.merge( ids_and_bricks_df,
                                                             how='left', left_index=True, right_index=True )
            ptgroup_and_brick_df = ptgroup_and_brick_df[['pointgroup', 'brick']]
            ptgroup_and_brick = ptgroup_and_brick_df.to_bag()
            
        # Persist and force computation before proceeding.
        #ptgroup_and_brick = persist_and_execute(ptgroup_and_brick, "Persisting joined point groups", logger, False)
        #assert ptgroup_and_brick.count().compute() == num_groups == brickwall.num_bricks

        def sample_points(points_and_brick):
            """
            Given a Brick and array of points (N,3) that lie within it,
            sample labels from the points within the brick and return
            a record array containing the points and the sampled labels.
            """
            points, brick = points_and_brick

            result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)]
            result = np.zeros((len(points),), result_dtype)
            result['z'] = points[:,0]
            result['y'] = points[:,1]
            result['x'] = points[:,2]

            # Make relative to brick offset
            points -= brick.physical_box[0]
            
            result['label'] = brick.volume[tuple(points.transpose())]
            return result

        with Timer("Sampling bricks", logger):
            brick_samples = ptgroup_and_brick.map(sample_points).compute()

        with Timer("Concatenating samples", logger):
            sample_table = np.concatenate(brick_samples)

        with Timer("Sorting samples", logger):
            # This will sort in terms of the SCALED z,y,x coordinates
            sample_table.sort()

        with Timer("Sorting table", logger):
            if rescale == 0:
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)
            else:
                # sample_table is sorted by RESCALED coordiante,
                # so sort our table the same way
                coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale)
                coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale)
                coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale)
                coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True)
                del coordinate_table_df['rz']
                del coordinate_table_df['ry']
                del coordinate_table_df['rx']
                
        # Now that samples and input rows are sorted identically,
        # append the results
        output_col = options["output-column"]
        coordinate_table_df[output_col] = sample_table['label'].copy()

        if rescale != 0:
            with Timer("Re-sorting table at scale 0", logger):
                # For simplicity (API and testing), we guarantee that coordinates are sorted in the output.
                # In the case of rescaled points, they need to be sorted once more (at scale 0 this time)
                coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True)

        with Timer("Exporting samples", logger):
            coordinate_table_df.to_csv(options["output-table"], header=True, index=False)

        logger.info("DONE.")
def _crop_body_mask_and_mito_seg(body_mask, mito_seg, mask_box, search_cfg, batch_tbars, primary_point, logger):
    """
    To reduce the size of the analysis volumes during distance computation
    (the most expensive step), we pre-filter out components of the body mask
    don't actually contain both points of interest and mito.

    If those segments don't even touch the volume edges,
    then any points on those segments can be safely marked 'done'
    if this is the final search config.
    """
    with Timer("Filtering components and cropping", logger):
        body_cc = labelMultiArrayWithBackground((body_mask != 0).view(np.uint8))

        # Keep only components which contain both mito and points
        tbar_points = batch_tbars[[*'zyx']].values // (2 ** search_cfg.analysis_scale)
        is_in_box = (tbar_points >= mask_box[0]).all(axis=1) & (tbar_points < mask_box[1]).all(axis=1)
        tbar_points = tbar_points[is_in_box]
        pts_local = tbar_points - mask_box[0]

        point_cc_df = batch_tbars.iloc[is_in_box][[*'zyx']].copy()
        point_cc_df['cc'] = body_cc[tuple(np.transpose(pts_local))]

        point_ccs = set(point_cc_df['cc'])
        mito_ccs = set(pd.unique(body_cc[mito_seg != 0]))
        keep_ccs = point_ccs & mito_ccs
        keep_mask = mask_for_labels(body_cc, keep_ccs)

        body_mask = np.where(keep_mask, body_mask, 0)
        mito_seg = np.where(keep_mask, mito_seg, 0)
        logger.info(f"Dropped {body_cc.max() - len(keep_ccs)} components, kept {len(keep_ccs)}")

        # Also determine the set of points which should be marked as hopeless,
        # due to a lack of mitos on their components.
        # Hopeless points are those which reside on hopeless components.
        # Hopeless components are any components that fall within the dilation
        # region and still ended up without mitos.
        hopeless_point_ids = []
        if search_cfg.is_final and (point_ccs - mito_ccs):
            with Timer("Identifying hopeless points", logger):
                # Calculate the region that is subject to repairs (dilation),
                # in local coordinates.
                dr = search_cfg.dilation_radius_s0 // (2 ** search_cfg.analysis_scale)
                buf = search_cfg.dilation_exclusion_buffer_s0 // (2 ** search_cfg.analysis_scale)
                buf += max(1, dr)
                R = search_cfg.radius_s0 // (2 ** search_cfg.analysis_scale)
                orig_box = np.array([primary_point - R, primary_point + R + 1])
                inner_box = orig_box + np.array([buf, -buf])[:, None]
                inner_box = box_intersection(mask_box, inner_box)
                inner_box = inner_box - mask_box[0]
                inner_vol = body_cc[box_to_slicing(*inner_box)]
                inner_ccs = set(pd.unique(inner_vol.ravel())) - {0}

                # Overwrite body_cc, we don't need it for anything else after this.
                body_cc[box_to_slicing(*inner_box)] = 0

                outer_ccs = set(body_cc.ravel())
                hopeless_ccs = (inner_ccs - outer_ccs) - mito_ccs  # noqa
                hopeless_point_ids = point_cc_df.query('cc in @hopeless_ccs').index

        # Shrink the volume bounding box to encompass only the
        # non-zero portion of the filtered body mask.
        nz_box = compute_nonzero_box(keep_mask)
        if not nz_box.any():
            return None, None, nz_box, hopeless_point_ids

        body_mask = body_mask[box_to_slicing(*nz_box)]
        mito_seg = mito_seg[box_to_slicing(*nz_box)]
        mask_box = mask_box[0] + nz_box

        return body_mask, mito_seg, mask_box, hopeless_point_ids
Beispiel #28
0
def split_brick(new_grid, original_brick):
    """
    Given a single brick and a new grid to which its data should be redistributed,
    split the brick into pieces, indexed by their NEW grid locations.

    The brick fragments are returned as Bricks themselves, but with relatively
    small volume and physical_box members.

    Note: It is probably a mistake to call this function for Bricks which have
          a larger physical_box than logical_box, so that is currently forbidden.
          (It would work here, but it implies that you will end up with some voxels
          represented multiple times in a given RDD of Bricks, with undefined results
          as to which ones are kept after you consolidate them into a new alignment.

          However, the reverse is permitted, i.e. it is permitted for the DESTINATION
          grid to use a halo, in which case some pixels in the original brick will be
          duplicated to multiple destinations.

    Returns: [Brick, Brick, ....],
            where each Brick is a fragment (to be assembled later into the new grid's bricks),
    """
    fragments = []

    # Forbid out-of-bounds physical_boxes. (See note above.)
    assert ((original_brick.physical_box[0] >= original_brick.logical_box[0]).all() and
            (original_brick.physical_box[1] <= original_brick.logical_box[1]).all()), \
                f"{original_brick.physical_box[:,::-1].tolist()} extends outside of {original_brick.logical_box[:,::-1].tolist()}"

    ## FIXME:
    ## If the brick lies completely within a single grid square for the destination block,
    ## Then boxes_from_grid() will only return a single box and the brick's volume will remain unchanged.
    ## In that case, it's probably best not to uncompress/recompress the brick.
    ## Just create a new brick with the same compressed data and a different logical_box.

    # Iterate over the new boxes that intersect with the original brick
    for destination_box in boxes_from_grid(original_brick.physical_box,
                                           new_grid,
                                           include_halos=True):
        # Physical intersection of original with new
        split_box = box_intersection(destination_box,
                                     original_brick.physical_box)

        # Extract portion of original volume data that belongs to this new box
        split_box_internal = split_box - original_brick.physical_box[0]
        fragment_vol = extract_subvol(original_brick.volume,
                                      split_box_internal)

        # Subtract out halo to get logical_box
        new_logical_box = destination_box - (-new_grid.halo_shape,
                                             new_grid.halo_shape)

        new_location_id = tuple(new_logical_box[0] // new_grid.block_shape)

        fragment_brick = Brick(new_logical_box,
                               split_box,
                               fragment_vol,
                               location_id=new_location_id,
                               compression=original_brick.compression)
        fragment_brick.compress()

        fragments.append(fragment_brick)

    original_brick.compress()
    return fragments