def test_realign_bricks_to_new_grid_WITH_HALO(): grid = Grid( (10,20), (12,3) ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) halo = 1 halo_shape = np.array([1,1]) new_grid = Grid((20,10), (0,0), halo) new_bricks = realign_bricks_to_new_grid(new_grid, original_bricks).compute() new_logical_boxes = list(brick.logical_box for brick in new_bricks) assert len(new_bricks) == 5 * 26, f"{len(new_bricks)}" # from (0,30) -> (100,290) for logical_box, brick in zip(new_logical_boxes, new_bricks): assert isinstance( brick, Brick ), f"Got {type(brick)}" assert (brick.logical_box == logical_box).all() # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == new_grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - new_grid.offset) % new_grid.block_shape == 0).all() # Should match logical_box+halo, except for edges assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
def test_split_brick_WITH_HALO(): halo = 1 grid = Grid( (10,20), (12,3), halo ) volume = np.random.randint(0,10, (100,300) ) # Test with the first brick in the grid physical_start = np.array(grid.offset) logical_start = physical_start // grid.block_shape * grid.block_shape logical_stop = logical_start + grid.block_shape physical_stop = logical_stop+halo # Not always true, but happens to be true in this case. logical_box = np.array([logical_start, logical_stop]) physical_box = np.array([physical_start, physical_stop]) assert (logical_box == [(10,0), (20,20)]).all() assert (physical_box == [(12,3), (21,21)]).all() original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) ) # New grid scheme new_grid = Grid((2,10), (0,0)) try: _fragments = split_brick(new_grid, original_brick) except AssertionError: pass # Expected failure: Forbidden to split bricks that have a halo else: assert False, "Did not encounter the expected assertion. split_brick() should fail for bricks that have a halo."
def _consolidate_and_pad(self, slab_index, input_wall, scale, output_service, align=True, pad=True): """ Consolidate (align), and pad the given BrickWall Note: UNPERSISTS the input data and returns the new, downsampled data. Args: scale: The pyramid scale of the data. output_service: The output_service to align to and pad from align: If False, skip the alignment step. (Only use this if the bricks are already aligned.) pad: If False, skip the padding step Returns a pre-executed and persisted BrickWall. """ output_writing_grid = Grid(output_service.preferred_message_shape) if not align or output_writing_grid.equivalent_to(input_wall.grid): realigned_wall = input_wall realigned_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Persisting pre-aligned bricks", logger) else: # Consolidate bricks to full-size, aligned blocks (shuffles data) realigned_wall = input_wall.realign_to_new_grid( output_writing_grid ) realigned_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Shuffling bricks into alignment", logger) # Discard original input_wall.unpersist() if not pad: return realigned_wall # Pad from previously-existing pyramid data until # we have full storage blocks, e.g. (64,64,64), # but not necessarily full bricks, e.g. (64,64,6400) storage_block_width = output_service.block_width output_padding_grid = Grid( (storage_block_width, storage_block_width, storage_block_width), output_writing_grid.offset ) output_accessor_func = partial(output_service.get_subvolume, scale=scale) padded_wall = realigned_wall.fill_missing(output_accessor_func, output_padding_grid) padded_wall.persist_and_execute(f"Slab {slab_index}: Scale {scale}: Padding", logger) # Discard old realigned_wall.unpersist() return padded_wall
def _partition_input(self): """ Map the input segmentation volume from DVID into an RDD of (volumePartition, data), using the config's bounding-box setting for the full volume region, using the input 'message-block-shape' as the partition size. Returns: (RDD, bounding_box_zyx, partition_shape_zyx) where: - RDD is (volumePartition, data) - bounding box is tuple (start_zyx, stop_zyx) - partition_shape_zyx is a tuple """ input_config = self.config_data["input"] options = self.config_data["options"] # repartition to be z=blksize, y=blksize, x=runlength brick_shape_zyx = input_config["message-block-shape"][::-1] input_grid = Grid(brick_shape_zyx, (0,0,0)) input_bb_zyx = np.array(input_config["bounding-box"])[:,::-1] # Aim for 2 GB RDD partitions GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes sparkdvid_input_context = sparkdvid(self.sc, input_config["server"], input_config["uuid"], self) bricks = sparkdvid_input_context.parallelize_bounding_box( input_config["segmentation-name"], input_bb_zyx, input_grid, target_partition_size_voxels ) return bricks, input_bb_zyx, input_grid
def test_sparse_boxes_WITH_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90) block_mask[0, 0, 0:5] = True # spans a single brick (physical: 30-60, logical: 30-60) block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT OFFSET mask_box_start = np.array([0, 10, 20]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30), (0, 0, 0)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 90]], [[0, 20, 30], [10, 30, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 70]], [[0, 20, 30], [10, 30, 60]]]).all()
def test_sparse_boxes_NO_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) block_mask[0, 0, 0:5] = True block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT ORIGIN (NO OFFSET) mask_box_start = np.array([0, 0, 0]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 60]], [[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 50]], [[0, 10, 10], [10, 20, 30]], [[0, 10, 30], [10, 20, 40]]]).all()
def test_generate_bricks_WITH_HALO(): halo = 1 halo_shape = np.array([1,1]) grid = Grid( (10,20), (12,3), halo ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) bricks = bricks.compute() assert len(bricks) == 9 * 14 == num_bricks for brick in bricks: assert isinstance( brick, Brick ) assert brick.logical_box.shape == (2,2) assert brick.physical_box.shape == (2,2) # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all() # Physical == logical+halo, except for bounding-box edges assert (brick.physical_box == box_intersection( brick.logical_box + (-halo_shape, halo_shape), bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all()
def test_generate_bricks(): grid = Grid( (10,20), (12,3) ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) bricks = bricks.compute() assert len(bricks) == 9 * 14 == num_bricks for brick in bricks: assert isinstance( brick, Brick ) assert brick.logical_box.shape == (2,2) assert brick.physical_box.shape == (2,2) # logical_box must be exactly one block assert ((brick.logical_box[1] - brick.logical_box[0]) == grid.block_shape).all() # Must be grid-aligned assert ((brick.logical_box - grid.offset) % grid.block_shape == 0).all() # Must not exceed bounding box assert (brick.physical_box == box_intersection( brick.logical_box, bounding_box )).all() # Volume shape must match assert (brick.volume.shape == brick.physical_box[1] - brick.physical_box[0]).all() # Volume data must match assert (brick.volume == extract_subvol( volume, brick.physical_box )).all() # __sizeof__ must include the volume assert sys.getsizeof(brick) > sys.getsizeof(brick.volume)
def translate(self, offset_zyx): """ Translate all bricks by the given offset. Does not change the brick data, just the logical/physical boxes. Also, translates the bounding box and grid. """ new_bounding_box = None if self.bounding_box is not None: new_bounding_box = self.bounding_box + offset_zyx new_grid = Grid(self.grid.block_shape, self.grid.offset + offset_zyx) def translate_brick(brick): # FIXME: This is needlessly inefficient for compressed bricks, # since it uncompresses and recompresses the volume, # but currently the Brick constructor doesn't allow me to # provide the compressed form directly. return Brick(brick.logical_box + offset_zyx, brick.physical_box + offset_zyx, brick.volume, location_id=tuple(brick.logical_box[0] // new_grid.block_shape), compression=brick.compression) translated_bricks = self.bricks.map(translate_brick) return BrickWall(new_bounding_box, new_grid, translated_bricks, self.num_bricks)
def _hotknife_destripe(self, bricked_slab_wall, slab_index): options = self.config["copygrayscale"] assert options["slab-axis"] == 'z', \ "To use hotknife-destripe, processing slabs must be cut across the Z axis" wall_shape = self.output_service.bounding_box_zyx[1] - self.output_service.bounding_box_zyx[0] z_slice_shape = (1,) + (*wall_shape[1:],) z_slice_grid = Grid( z_slice_shape ) z_slice_slab = bricked_slab_wall.realign_to_new_grid( z_slice_grid ) z_slice_slab.persist_and_execute(f"Slab {slab_index}: Constructing slices of shape {z_slice_shape}", logger) # This assertion could be lifted if we adjust seams as needed before calling destripe(), # but for now I have no use-case for volumes that don't start at (0,0) assert (bricked_slab_wall.bounding_box[0, 1:] == (0,0)).all(), \ "Input bounding box must start at YX == (0,0)" seams = options["hotknife-seams"] def destripe_brick(brick): assert brick.volume.shape[0] == 1 adjusted_slice = destripe(brick.volume[0], seams) return Brick(brick.logical_box, brick.physical_box, adjusted_slice[None], location_id=brick.location_id) adjusted_bricks = z_slice_slab.bricks.map(destripe_brick) adjusted_wall = BrickWall( bricked_slab_wall.bounding_box, bricked_slab_wall.grid, adjusted_bricks ) adjusted_wall.persist_and_execute(f"Slab {slab_index}: Destriping slices", logger) return adjusted_wall
def main(): # Hard-coded parameters prod = 'emdata4:8900' master = (prod, find_master(prod)) master_seg = (*master, 'segmentation') # I accidentally corrupted the labelindex of bodies in this region patch_box = 20480 + np.array([[0, 0, 0], [1024, 1024, 1024]]) with Timer("Fetching supervoxels", logger): boxes = boxes_from_grid(patch_box, Grid((64, 64, 6400)), clipped=True) sv_sets = compute_parallel(partial(_fetch_svs, master_seg), boxes, processes=32, ordered=False, leave_progress=True) svs = set(chain(*sv_sets)) - set([0]) bodies = set(fetch_mapping(*master_seg, svs)) with Timer(f"Repairing {len(bodies)} labelindexes", logger): compute_parallel(partial(_repair_index, master_seg), bodies, processes=32, ordered=False, leave_progress=True) print("DONE.")
def test_boxes_from_grid_0(): # Simple: bounding_box starts at zero, no offset grid = Grid( (10,20), (0,0) ) bounding_box = [(0,0), (100,300)] boxes = np.array(list(boxes_from_grid(bounding_box, grid))) assert boxes.shape == (np.prod( np.array(bounding_box[1]) / grid.block_shape ), 2, 2) assert (boxes % grid.block_shape == 0).all() assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
def test_pad_brick_data_from_volume_source_NO_PADDING_NEEDED(): source_volume = np.random.randint(0,10, (100,300) ) logical_box = [(1,0), (11,20)] physical_box = [(6,10), (11, 15)] brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) ) padding_grid = Grid( (5,5), offset=(1,0) ) padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick ) assert padded_brick is brick, "Expected to get the same brick back."
def test_pad_brick_data_from_volume_source(): source_volume = np.random.randint(0,10, (100,300) ) logical_box = [(1,0), (11,20)] physical_box = [(3,8), (7, 13)] brick = Brick( logical_box, physical_box, extract_subvol(source_volume, physical_box) ) padding_grid = Grid( (5,5), offset=(1,0) ) padded_brick = pad_brick_data_from_volume_source( padding_grid, partial(extract_subvol, source_volume), brick ) assert (padded_brick.logical_box == brick.logical_box).all() assert (padded_brick.physical_box == [(1,5), (11, 15)]).all() assert (padded_brick.volume == extract_subvol(source_volume, padded_brick.physical_box)).all()
def test_realign_bricks_to_same_grid(): """ The realign function has a special optimization to avoid realigning bricks that are already aligned. """ grid = Grid( (10,20), (12,3) ) bounding_box = np.array([(15,30), (95,290)]) def assert_if_called(box): assert False, ("Shouldn't get here, since the bricks were generated with lazy=True " "and realignment shouldn't have attempted to split any bricks.") original_bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, assert_if_called, DebugClient(), lazy=True ) new_bricks = realign_bricks_to_new_grid(grid, original_bricks) import dask.bag assert isinstance(new_bricks, dask.bag.Bag) # If we attempt to realign to a different grid, # we'll get an assertion because it will have to call create_brick_volume, above. with pytest.raises(AssertionError): realign_bricks_to_new_grid(Grid((20,10)), original_bricks).compute()
def test_boxes_from_grid_1(): # Set a non-aligned bounding box grid = Grid( (10,20), (0,0) ) bounding_box = np.array([(15,30), (95,290)]) aligned_bounding_box = ( bounding_box[0] // grid.block_shape * grid.block_shape, (bounding_box[1] + grid.block_shape - 1 ) // grid.block_shape * grid.block_shape ) algined_bb_shape = aligned_bounding_box[1] - aligned_bounding_box[0] boxes = np.array(list(boxes_from_grid(bounding_box, grid))) assert boxes.shape == (np.prod( algined_bb_shape / grid.block_shape ), 2, 2) assert (boxes % grid.block_shape == 0).all() assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
def test_split_brick(): grid = Grid( (10,20), (12,3) ) volume = np.random.randint(0,10, (100,300) ) # Test with the first brick in the grid physical_start = np.array(grid.offset) logical_start = physical_start // grid.block_shape * grid.block_shape logical_stop = logical_start + grid.block_shape physical_stop = logical_stop # Not always true, but happens to be true in this case. logical_box = np.array([logical_start, logical_stop]) physical_box = np.array([physical_start, physical_stop]) assert (logical_box == [(10,0), (20,20)]).all() assert (physical_box == [(12,3), (20,20)]).all() original_brick = Brick( logical_box, physical_box, extract_subvol(volume, physical_box) ) # New grid scheme new_grid = Grid((2,10), (0,0)) fragments = split_brick(new_grid, original_brick) boxes = list(box_as_tuple(frag.logical_box) for frag in fragments) assert boxes == [ # ((10, 0), (14, 10)), # <--- Not present. These new boxes intersect with the original logical_box, # ((10, 10), (14, 20)), # <--- but there is no physical data for them in the original brick. ((12, 0), (14, 10)), ((12, 10), (14, 20)), ((14, 0), (16, 10)), ((14, 10), (16, 20)), ((16, 0), (18, 10)), ((16, 10), (18, 20)), ((18, 0), (20, 10)), ((18, 10), (20, 20)) ] for frag in fragments: assert (frag.volume == extract_subvol(volume, frag.physical_box)).all()
def test_boxes_from_grid_2(): # Use a grid offset grid = Grid( (10,20), (2,3) ) bounding_box = np.array([(5,10), (95,290)]) aligned_bounding_box = ( bounding_box[0] // grid.block_shape * grid.block_shape, (bounding_box[1] + grid.block_shape - 1 ) // grid.block_shape * grid.block_shape ) aligned_bb_shape = aligned_bounding_box[1] - aligned_bounding_box[0] boxes = np.array(list(boxes_from_grid(bounding_box, grid))) assert boxes.shape == (np.prod( aligned_bb_shape / grid.block_shape ), 2, 2) # Boxes should be offset by grid.offset. assert ((boxes - grid.offset) % grid.block_shape == 0).all() assert (boxes[:, 1, :] - boxes[:, 0, :] == grid.block_shape).all()
def downsample(self, block_shape, method): """ See util.downsample for available methods Note: If the downsampling block_shape does not perfectly divide into the brick's physical_box start or stop, voxels on the edge of the volume will be discarded before downsampling. """ assert block_shape[0] == block_shape[1] == block_shape[2], \ "Currently, downsampling must be isotropic" factor = block_shape[0] def downsample_brick(brick): assert (brick.logical_box % factor == 0).all() # If the factor doesn't perfectly divide into # the brick's physical dimensions, # then chop off the edges until it does. if (brick.physical_box % factor != 0).any(): clipped_box = round_box(brick.physical_box, factor, 'in') volume = extract_subvol(brick.volume, clipped_box - brick.physical_box[0]) else: clipped_box = brick.physical_box volume = brick.volume downsampled_volume = downsample(volume, factor, method) downsampled_logical_box = brick.logical_box // factor downsampled_physical_box = clipped_box // factor return Brick(downsampled_logical_box, downsampled_physical_box, downsampled_volume, compression=brick.compression) new_bounding_box = None if self.bounding_box is not None: new_bounding_box = self.bounding_box // factor new_grid = Grid(self.grid.block_shape // factor, self.grid.offset // factor) new_bricks = self.bricks.map(downsample_brick) return BrickWall(new_bounding_box, new_grid, new_bricks, self.num_bricks)
def init_brickwall(self): input_config = self.config["input"] mask_input_config = self.config["mask-input"] mgr_config = self.config["resource-manager"] options = self.config["sparseblockstats"] resource_mgr_client = ResourceManagerClient( mgr_config["server"], mgr_config["port"] ) input_service = VolumeService.create_from_config( input_config, resource_mgr_client ) mask_service = VolumeService.create_from_config( mask_input_config, resource_mgr_client ) assert (input_service.preferred_message_shape == mask_service.preferred_message_shape).all(), \ "This workflow assumes that the input and the mask-input use the same brick grid." assert not (input_service.preferred_message_shape % input_service.block_width).any(), \ "input brick grid spacing must be a multipe of the input's block-width" assert not (mask_service.preferred_message_shape % mask_service.block_width).any(), \ "mask brick grid spacing must be a multipe of the input's block-width" is_supervoxels = False if isinstance(mask_service.base_service, DvidVolumeService): is_supervoxels = mask_service.base_service.supervoxels # Load body list and eliminate duplicates subset_labels = load_body_list(options["mask-labels"], is_supervoxels) subset_labels = set(subset_labels) if not subset_labels: raise RuntimeError("You didn't specify any mask subset labels. " "If you want to compute block stats for an entire segmentation volume, use the CopySegmentation workflow.") sbm = mask_service.sparse_block_mask_for_labels(subset_labels) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError("Could not find sparse masks for any of the mask-labels") with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes brickwall = BrickWall.from_volume_service(input_service, 0, None, self.client, target_partition_size_voxels, 0, sbm) # Pad if necessary to ensure that all fetched bricks are block-aligned block_shape = 3*(input_service.block_width,) brickwall = brickwall.fill_missing(input_service.get_subvolume, Grid(block_shape)) return brickwall
def test_extract_halos_subsets(): halo = 1 grid = Grid( (10,20), (0,0), halo ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) def bricks_to_df(bricks): rows = [] for brick in bricks: rows.append([*brick.physical_box.flat, brick.volume]) df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brickvol']) df = df.sort_values(['y0', 'x0', 'y1', 'x1']).reset_index(drop=True) return df def check(all_halos, lower_halos, upper_halos): all_df = bricks_to_df(all_halos) lower_df = bricks_to_df(lower_halos) upper_df = bricks_to_df(upper_halos) combined_df = pd.concat([lower_df, upper_df], ignore_index=True).sort_values(['y0', 'x0', 'y1', 'x1']) combined_df.reset_index(drop=True, inplace=True) assert (all_df[['y0', 'x0', 'y1', 'x1']] == combined_df[['y0', 'x0', 'y1', 'x1']]).all().all() for a, b in zip(all_df['brickvol'].values, combined_df['brickvol'].values): assert (a == b).all() # Check that 'all' is the same as combining 'lower' and 'upper' all_outer_halos = extract_halos(bricks, grid, 'outer', 'all').compute() lower_outer_halos = extract_halos(bricks, grid, 'outer', 'lower').compute() upper_outer_halos = extract_halos(bricks, grid, 'outer', 'upper').compute() all_inner_halos = extract_halos(bricks, grid, 'inner', 'all').compute() lower_inner_halos = extract_halos(bricks, grid, 'inner', 'lower').compute() upper_inner_halos = extract_halos(bricks, grid, 'inner', 'upper').compute() check(all_outer_halos, lower_outer_halos, upper_outer_halos) check(all_inner_halos, lower_inner_halos, upper_inner_halos)
def test_compression(): vol_box = [(0,0,0), (100,100,120)] volume = np.random.randint(10, size=vol_box[1], dtype=np.uint64) for method in COMPRESSION_METHODS: wall = BrickWall.from_accessor_func(vol_box, Grid((64,64,128)), lambda box: extract_subvol(volume, box), compression=method) # Compress them all wall.bricks.map(Brick.compress).compute() def check_pickle(brick): pickle.dumps(brick) # Compress them all wall.bricks.map(check_pickle).compute() def check_brick(brick): assert (brick.volume.shape == (brick.physical_box[1] - brick.physical_box[0])).all() assert (brick.volume == extract_subvol(volume, brick.physical_box)).all() # Check them all (implicit decompression) wall.bricks.map(check_brick).compute()
def timed_fetch_blocks_from_box(box): """ Fetch the blocks for a given box and return the time it took to fetch them. Do not bother decompressing the blocks or combining them into a single volume. """ assert not (box % block_shape).any( ), "For this test, all requests must be block-aligned" block_boxes = list(boxes_from_grid(box, Grid(block_shape))) block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape block_coords_str = ','.join(map(str, block_coords_xyz.flat)) voxel_count = np.prod(box[1] - box[0]) session = default_dvid_session() url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}' with resource_mgr_client.access_context(server, True, 1, voxel_count): timestamp = datetime.now() with Timer() as timer: r = session.get(url) r.raise_for_status() return timestamp, voxel_count, len(r.content), timer.seconds
def _consolidate_and_pad(self, slab_index, input_wall, scale, output_service): """ Consolidate (align), and pad the given BrickWall Args: scale: The pyramid scale of the data. output_service: The output_service to align to and pad from Returns a pre-executed and persisted BrickWall. """ options = self.config["copysegmentation"] # We'll pad from previously-existing pyramid data until # we have full storage blocks, e.g. (64,64,64), # but not necessarily full bricks, e.g. (64,64,6400) output_writing_grid = Grid(output_service.preferred_message_shape) storage_block_width = output_service.block_width output_padding_grid = Grid( (storage_block_width, storage_block_width, storage_block_width), output_writing_grid.offset) output_accessor_func = partial(output_service.get_subvolume, scale=scale) with Timer( f"Slab {slab_index}: Scale {scale}: Shuffling bricks into alignment", logger): # Consolidate bricks to full-size, aligned blocks (shuffles data) realigned_wall = input_wall.realign_to_new_grid( output_writing_grid, output_accessor_func) del input_wall realigned_wall.persist_and_execute() input_mask_labels = self.input_mask_labels output_mask_labels = self.output_mask_labels # If no masks are involved, we merely need to pad the existing data on the edges. # (No need to fetch the entire output.) # Similarly, if scale > 0, then the masks were already applied and the input/output data was # already combined, we can simply write the (padded) downsampled data. if scale == 0 and (input_mask_labels or output_mask_labels ) and not options["skip-masking-step"]: # If masks are involved, we must fetch the ALL the output # (unless skip-masking-step was given), # and select data from input or output according to the masks. output_service = self.output_service translation_offset_zyx = self.translation_offset_zyx def combine_with_output(input_brick): output_box = input_brick.physical_box + translation_offset_zyx output_vol = output_service.get_subvolume(output_box, scale=0) output_vol = np.asarray(output_vol, order='C') mask = None if input_mask_labels: mask = mask_for_labels(input_brick.volume, input_mask_labels) if output_mask_labels: output_mask = mask_for_labels(output_vol, output_mask_labels) if mask is None: mask = output_mask else: mask[:] &= output_mask # Start with the complete output, then # change voxels that fall within both masks. output_vol[mask] = input_brick.volume[mask] input_brick.compress() return output_vol combined_wall = realigned_wall.map_brick_volumes( combine_with_output) combined_wall.persist_and_execute( f"Slab {slab_index}: Scale {scale}: Combining masked bricks", logger) realigned_wall = combined_wall padded_wall = realigned_wall.fill_missing(output_accessor_func, output_padding_grid) del realigned_wall padded_wall.persist_and_execute( f"Slab {slab_index}: Scale {scale}: Padding", logger) return padded_wall
def _process_slab(self, scale, slab_fullres_box_zyx, slab_index, num_slabs, upscale_slab_wall, min_scale): options = self.config["copygrayscale"] pyramid_source = options["pyramid-source"] downsample_method = options["downsample-method"] output_service = self.output_service if scale < min_scale and pyramid_source == "copy": logger.info(f"Slab {slab_index}: Skipping scale {scale}") return slab_voxels = np.prod(slab_fullres_box_zyx[1] - slab_fullres_box_zyx[0]) // (2**scale)**3 voxels_per_thread = slab_voxels // self.total_cores() partition_voxels = voxels_per_thread // 2 logging.info(f"Slab {slab_index}: Aiming for partitions of {partition_voxels} voxels") if pyramid_source == "copy" or scale == 0: # Copy from input source bricked_slab_wall = BrickWall.from_volume_service(self.input_service, scale, slab_fullres_box_zyx, self.client, partition_voxels) bricked_slab_wall.persist_and_execute(f"Slab {slab_index}: Downloading scale {scale}", logger) else: # Downsample from previous scale bricked_slab_wall = upscale_slab_wall.downsample( (2,2,2), downsample_method ) bricked_slab_wall.persist_and_execute(f"Slab {slab_index}: Downsampling to scale {scale}", logger) del upscale_slab_wall if scale == 0: bricked_slab_wall = self.adjust_contrast(bricked_slab_wall, slab_index) # Remap to output bricks with Timer(f"Slab {slab_index}: Realigning to output grid", logger): output_grid = Grid(output_service.preferred_message_shape) output_slab_wall = bricked_slab_wall.realign_to_new_grid( output_grid ) if options["fill-blocks"]: # Pad from previously-existing pyramid data until # we have full storage blocks, e.g. (64,64,64), # but not necessarily full bricks, e.g. (64,64,6400) output_accessor_func = partial(output_service.get_subvolume, scale=scale) # But don't bother fetching real data for scale 0 # the input slabs are already block-aligned, and the edges of each slice will be zeros anyway. if scale == 0: output_accessor_func = lambda _box: 0 if isinstance( output_service.base_service, DvidVolumeService): # For DVID, we use minimum padding (just pad up to the # nearest block boundary, not the whole brick boundary). padding_grid = Grid( 3*(output_service.block_width,), output_grid.offset ) else: padding_grid = output_slab_wall.grid output_slab_wall = output_slab_wall.fill_missing(output_accessor_func, padding_grid) output_slab_wall.persist_and_execute(f"Slab {slab_index}: Assembling scale {scale} bricks", logger) # Discard original bricks del bricked_slab_wall if scale < min_scale: logger.info(f"Slab {slab_index}: Not writing scale {scale}") return output_slab_wall def _write(brick): write_brick(output_service, scale, brick) with Timer(f"Slab {slab_index}: Writing scale {scale}"): output_slab_wall.bricks.map(_write).compute() return output_slab_wall
def execute(self): self._init_services() self._sanitize_config() options = self.config_data["options"] output_service = self.output_service logger.info( f"Output bounding box: {output_service.bounding_box_zyx[:,::-1]}") # Data is processed in Z-slabs slab_depth = options["slices-per-slab"] input_bb_zyx = self.input_service.bounding_box_zyx _, slice_start_y, slice_start_x = input_bb_zyx[0] slab_shape_zyx = input_bb_zyx[1] - input_bb_zyx[0] slab_shape_zyx[0] = slab_depth slice_shape_zyx = slab_shape_zyx.copy() slice_shape_zyx[0] = 1 # This grid outlines the slabs -- each grid box is a full slab slab_grid = Grid(slab_shape_zyx, (0, slice_start_y, slice_start_x)) slab_boxes = list(clipped_boxes_from_grid(input_bb_zyx, slab_grid)) for slab_index, slab_box_zyx in enumerate(slab_boxes): # Contruct BrickWall from input bricks num_threads = num_worker_nodes() * cpus_per_worker() slab_voxels = np.prod(slab_box_zyx[1] - slab_box_zyx[0]) voxels_per_thread = slab_voxels / num_threads bricked_slab_wall = BrickWall.from_volume_service( self.input_service, 0, slab_box_zyx, self.sc, voxels_per_thread / 2) # Force download bricked_slab_wall.persist_and_execute( f"Downloading slab {slab_index}/{len(slab_boxes)}: {slab_box_zyx[:,::-1]}", logger) # Remap to slice-sized "bricks" sliced_grid = Grid(slice_shape_zyx, offset=slab_box_zyx[0]) sliced_slab_wall = bricked_slab_wall.realign_to_new_grid( sliced_grid) sliced_slab_wall.persist_and_execute( f"Assembling slab {slab_index}/{len(slab_boxes)} slices", logger) # Discard original bricks bricked_slab_wall.unpersist() del bricked_slab_wall def write_slice(brick): assert (brick.physical_box == brick.logical_box).all() output_service.write_subvolume(brick.volume, brick.physical_box[0]) # Export to PNG or TIFF, etc. (automatic via slice path extension) with Timer() as timer: logger.info(f"Exporting slab {slab_index}/{len(slab_boxes)}", extra={ "status": f"Exporting {slab_index}/{len(slab_boxes)}" }) rt.foreach(write_slice, sliced_slab_wall.bricks) logger.info( f"Exporting slab {slab_index}/{len(slab_boxes)} took {timer.timedelta}", extra={"status": f"Done: {slab_index}/{len(slab_boxes)}"}) # Discard slice data sliced_slab_wall.unpersist() del sliced_slab_wall logger.info(f"DONE exporting {len(slab_boxes)} slabs.", extra={'status': "DONE"})
def execute(self): from pyspark import StorageLevel self._sanitize_config() config = self.config_data options = config["options"] resource_mgr_client = ResourceManagerClient(options["resource-server"], options["resource-port"]) total_cpus = 16 * num_worker_nodes() concurrent_threads = total_cpus if options["resource-server"]: concurrent_threads = options["resource-server-config"]["read_reqs"] if concurrent_threads > total_cpus: msg = "You're attempting to use the resource manager to constrain concurrency, but you "\ "aren't running with a large enough cluster to saturate the resource manager settings" raise RuntimeError(msg) # We instantiate a VolumeService as an easy way to plug in missing config values as necessary. # (We won't actually use it.) volume_service = VolumeService.create_from_config( config["input"], self.config_dir) server = volume_service.server uuid = volume_service.uuid instance = volume_service.instance_name block_shape = 3 * (volume_service.block_width, ) def timed_fetch_blocks_from_box(box): """ Fetch the blocks for a given box and return the time it took to fetch them. Do not bother decompressing the blocks or combining them into a single volume. """ assert not (box % block_shape).any( ), "For this test, all requests must be block-aligned" block_boxes = list(boxes_from_grid(box, Grid(block_shape))) block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape block_coords_str = ','.join(map(str, block_coords_xyz.flat)) voxel_count = np.prod(box[1] - box[0]) session = default_dvid_session() url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}' with resource_mgr_client.access_context(server, True, 1, voxel_count): timestamp = datetime.now() with Timer() as timer: r = session.get(url) r.raise_for_status() return timestamp, voxel_count, len(r.content), timer.seconds # This hash-related hackery is to ensure uniform partition lengths, which Spark is bad at by default. boxes = list( clipped_boxes_from_grid( volume_service.bounding_box_zyx, Grid(volume_service.preferred_message_shape))) indexed_boxes = list(map(rt.tuple_with_hash, (enumerate(boxes)))) for i_box in indexed_boxes: i_box.set_hash(i_box[0]) rdd_boxes = self.sc.parallelize(indexed_boxes).values() timestamps_voxels_sizes_times = rdd_boxes.map( timed_fetch_blocks_from_box) # The only reason I'm persisting this is to see the partition distribution in the log. rt.persist_and_execute(timestamps_voxels_sizes_times, "Fetching blocks", logger, StorageLevel.MEMORY_ONLY) #@UndefinedVariable # Execute the workload timestamps, voxels, sizes, times = zip( *timestamps_voxels_sizes_times.collect()) # Process the results self.dump_stats(timestamps, voxels, sizes, times, block_shape, concurrent_threads)
def execute(self): self._init_services() options = self.config["labelmapcopy"] input_service = self.input_service output_service = self.output_service mgr_client = self.mgr_client record_labels = options["record-label-sets"] record_only = options["record-only"] check_existing = options["dont-overwrite-identical-blocks"] if record_only: assert options["min-scale"] == 0 and options["max-scale"] == 0, \ ("In record-only mode, there is no reason to process any scales other than 0. " "Set min-scale and max-scale to 0.") def copy_box(box, scale): assert not record_only or scale == 0 box = round_box(box, 64, 'out') box_shape = (box[1] - box[0]) # Read input blocks with mgr_client.access_context(input_service.server, True, 1, np.prod(box_shape)): input_raw_blocks = fetch_labelmap_voxels( *input_service.instance_triple, box, scale, False, input_service.supervoxels, format='raw-response') # If we're just recording, parse and return if scale == 0 and record_only: _input_spans, input_labels = parse_labelarray_data( input_raw_blocks, extract_labels=True) return list(set(chain(*input_labels.values()))) # If not checking the output, just copy input to output if not check_existing: with mgr_client.access_context(output_service.server, False, 1, np.prod(box_shape)): post_labelmap_blocks(*output_service.instance_triple, None, input_raw_blocks, scale, output_service.enable_downres, output_service.disable_indexing, False, is_raw=True) if scale == 0 and record_labels: _input_spans, input_labels = parse_labelarray_data( input_raw_blocks, extract_labels=True) return list(set(chain(*input_labels.values()))) return [] # Read from output with mgr_client.access_context(output_service.server, True, 1, np.prod(box_shape)): output_raw_blocks = fetch_labelmap_voxels( *output_service.instance_triple, box, scale, False, output_service.supervoxels, format='raw-response') # If no differences, no need to parse if (input_raw_blocks == output_raw_blocks): return [] input_spans = parse_labelarray_data(input_raw_blocks, extract_labels=False) output_spans = parse_labelarray_data(output_raw_blocks, extract_labels=False) # Compare block IDs input_ids = set(input_spans.keys()) output_ids = set(output_spans.keys()) missing_from_output = input_ids - output_ids missing_from_input = output_ids - input_ids common_ids = input_ids & output_ids for block_id in missing_from_input: # FIXME: We should pass this in the result so it can be logged in the client, not the worker. logger.error( f"Not overwriting block-id: {block_id}. It doesn't exist in the input." ) # Filter the input blocks so only the new/different ones remain filtered_input_list = [] for block_id in missing_from_output: start, stop = input_spans[block_id] filtered_input_list.append( (block_id, input_raw_blocks[start:stop])) filtered_output_list = [] for block_id in common_ids: in_start, in_stop = input_spans[block_id] out_start, out_stop = output_spans[block_id] in_buf = input_raw_blocks[in_start:in_stop] out_buf = output_raw_blocks[out_start:out_stop] if in_buf != out_buf: filtered_input_list.append((block_id, in_buf)) filtered_output_list.append((block_id, out_buf)) # Sort filtered blocks so they appear in the same order in which we received them. filtered_input_list = sorted( filtered_input_list, key=lambda k_v: input_spans[k_v[0]][0]) # Post them filtered_input_buf = b''.join( [buf for (_, buf) in filtered_input_list]) with mgr_client.access_context(output_service.server, False, 1, np.prod(box_shape)): post_labelmap_blocks(*output_service.instance_triple, None, filtered_input_buf, scale, output_service.enable_downres, output_service.disable_indexing, False, is_raw=True) if scale == 0 and record_labels: filtered_output_buf = b''.join( [buf for (_, buf) in filtered_output_list]) _, filtered_input_labels = parse_labelarray_data( filtered_input_buf, extract_labels=True) _, filtered_output_labels = parse_labelarray_data( filtered_output_buf, extract_labels=True) input_set = set(chain(*filtered_input_labels.values())) output_set = set(chain(*filtered_output_labels.values())) return list(input_set - output_set) return [] all_labels = set() try: for scale in range(options["min-scale"], 1 + options["max-scale"]): scaled_bounding_box = input_service.bounding_box_zyx // (2** scale) slab_boxes = clipped_boxes_from_grid( scaled_bounding_box, options["slab-shape"][::-1]) logger.info(f"Scale {scale}: Copying {len(slab_boxes)} slabs") for slab_index, slab_box in enumerate(slab_boxes): brick_boxes = clipped_boxes_from_grid( slab_box, Grid(self.input_service.preferred_message_shape)) with Timer( f"Scale {scale} slab {slab_index}: Copying {slab_box[:,::-1].tolist()} ({len(brick_boxes)} bricks)", logger): brick_labels = db.from_sequence(brick_boxes).map( lambda box: copy_box(box, scale)).compute() slab_labels = chain(*brick_labels) all_labels |= set(slab_labels) finally: if record_labels: name = 'sv' if input_service.supervoxels else 'body' pd.Series(sorted(all_labels), name=name).to_csv('recorded-labels.csv', index=False, header=True)
def from_volume_service(cls, volume_service, scale=0, bounding_box_zyx=None, client=None, target_partition_size_voxels=None, halo=0, sparse_block_mask=None, lazy=False, compression=None): """ Convenience constructor, initialized from a VolumeService object. Args: volume_service: An instance of a VolumeService bounding_box_zyx: (start, stop) Optional. Bounding box to restrict the region of fetched blocks, always specified in FULL-RES coordinates, even if you are passing scale > 0 If not provided, volume_service.bounding_box_zyx is used. scale: Brick data will be fetched at this scale. (Note: The bricks' sizes will still be the the full volume_service.preferred_message_shape, but the overall bounding-box of the BrickWall be scaled down.) client: dask distributed.Client target_partition_size_voxels: Optional. If provided, the RDD partition lengths (i.e. the number of bricks per RDD partition) will be chosen to have (approximately) this many total voxels in each partition. halo: If provided, add a halo to the brick grid that will be used to fetch the data. Depending on your use-case and/or input source, this can be faster than applying a halo after-the-fact, which involves shuffling data across the cluster. sparse_block_mask: Instance of SparseBlockMask lazy: If True, the bricks' data will not be created until their 'volume' member is first accessed. compression: If provided, the brick volume data will be serialized/stored in a compressed format. See ``flyemflows.util.compressed_volume.COMPRESSION_METHODS`` """ grid = Grid(volume_service.preferred_message_shape, (0, 0, 0), halo) if bounding_box_zyx is None: bounding_box_zyx = volume_service.bounding_box_zyx bounding_box_zyx = np.asarray(bounding_box_zyx) if scale == 0: downsampled_box = bounding_box_zyx else: full_box = bounding_box_zyx downsampled_box = np.zeros((2, 3), dtype=int) downsampled_box[0] = full_box[0] // 2**scale # round down # Proper downsampled bounding-box would round up here... #downsampled_box[1] = (full_box[1] + 2**scale - 1) // 2**scale # ...but some some services probably don't do that, so we'll # round down to avoid out-of-bounds errors for higher scales. downsampled_box[1] = full_box[1] // 2**scale sparse_boxes = None if sparse_block_mask is not None: # FIXME: # # It would save a lot of time in generate_bricks_from_volume_source() if we implemented # a faster way to filter boxes in SparseBlockMask, # and called it here. Right now, workflows that process data in "slabs" # end up passing the same SparseBlockMask for every slab, which gets processed from # scratch in generate_bricks_from_volume_source() to filter boxes for each slab's bounding box. assert isinstance(sparse_block_mask, SparseBlockMask) assert scale == 0, "FIXME: I don't think the sparse feature works with scales other than 0." sparse_boxes = sparse_block_mask.sparse_boxes(grid) if len(sparse_boxes) == 0: # Some workflows check for this message; if you change it, change those checks! raise RuntimeError("SparseBlockMask selects no blocks at all!") return BrickWall.from_accessor_func( downsampled_box, grid, lambda box: volume_service.get_subvolume(box, scale), client, target_partition_size_voxels, sparse_boxes, lazy, compression=compression)
def execute(self): self._init_service() mgr_client = self.mgr_client options = self.config["stitchedmeshes"] server, uuid, instance = self.input_service.base_service.instance_triple is_supervoxels = self.input_service.base_service.supervoxels bodies = load_body_list(options["bodies"], is_supervoxels) logger.info(f"Input is {len(bodies)} bodies") os.makedirs(options["output-directory"], exist_ok=True) def make_bricks(coord_and_block): coord_zyx, block_vol = coord_and_block logical_box = np.array((coord_zyx, coord_zyx + block_vol.shape)) return Brick(logical_box, logical_box, block_vol, location_id=(logical_box // 64)) rescale = (2**options["scale"]) * options["extra-rescale"] def create_brick_mesh(brick): mesh = Mesh.from_binary_vol(brick.volume, brick.physical_box) if rescale != 1.0: mesh.vertices_zyx *= rescale return mesh def create_combined_mesh(meshes): mesh = concatenate_meshes(meshes, False) if options["stitch"]: mesh.stitch_adjacent_faces(drop_unused_vertices=True, drop_duplicate_faces=True) mesh.laplacian_smooth(options["smoothing-iterations"]) mesh.simplify(options["decimation-fraction"], in_memory=True) return mesh in_flight = 0 # Support synchronous testing with a fake 'as_completed' object if hasattr(self.client, 'DEBUG'): result_futures = as_completed_synchronous() else: result_futures = as_completed() def pop_result(): nonlocal in_flight r = next(result_futures) in_flight -= 1 try: return r.result() except Exception as ex: if options["error-mode"] == "raise": raise body = int(r.key) return (body, 0, 'error', str(ex)) USER = getpass.getuser() results = [] try: for i, body in enumerate(bodies): logger.info(f"Mesh #{i}: Body {body}: Starting") def fetch_sparsevol(): with mgr_client.access_context(server, True, 1, 0): ns = default_node_service(server, uuid, 'flyemflows-stitchedmeshes', USER) coords_zyx, blocks = ns.get_sparselabelmask( body, instance, options["scale"], is_supervoxels) return list(coords_zyx.copy()), list(blocks.copy()) # This leaves all blocks and bricks in a single partition, # but we're about to do a shuffle anyway when the bricks are realigned. coords, blocks = delayed(fetch_sparsevol, nout=2)() coords, blocks = db.from_delayed(coords), db.from_delayed( blocks) bricks = db.zip(coords, blocks).map(make_bricks) mesh_grid = Grid((64, 64, 64), halo=options["block-halo"]) wall = BrickWall(None, (64, 64, 64), bricks) wall = wall.realign_to_new_grid(mesh_grid) brick_meshes = wall.bricks.map(create_brick_mesh) consolidated_brick_meshes = brick_meshes.repartition(1) combined_mesh = delayed(create_combined_mesh)( consolidated_brick_meshes) def write_mesh(mesh): output_dir = options["output-directory"] fmt = options["format"] output_path = f'{output_dir}/{body}.{fmt}' mesh.serialize(output_path) return (body, len(mesh.vertices_zyx), 'success', '') # We hide the body ID in the task name, so that we can record it in pop_result task = delayed(write_mesh)(combined_mesh, dask_key_name=f'{body}') result_futures.add(self.client.compute(task)) in_flight += 1 assert in_flight <= options["concurrent-bodies"] while in_flight == options["concurrent-bodies"]: body, vertices, result, msg = pop_result() if result == "error": logger.warning( f"Body {body}: Failed to generate mesh: {msg}") results.append((body, vertices, result, msg)) # Flush the last batch of tasks while in_flight > 0: body, vertices, result, msg = pop_result() if result == "error": logger.warning( f"Body {body}: Failed to generate mesh: {msg}") results.append((body, vertices, result, msg)) finally: stats_df = pd.DataFrame( results, columns=['body', 'vertices', 'result', 'msg']) stats_df.to_csv('mesh-stats.csv', index=False, header=True) failed_df = stats_df.query("result != 'success'") if len(failed_df) > 0: logger.warning( f"Failed to create meshes for {len(failed_df)} bodies. See mesh-stats.csv" )
def test_extract_halos(): halo = 1 grid = Grid( (10,20), (0,0), halo ) bounding_box = np.array([(15,30), (95,290)]) volume = np.random.randint(0,10, (100,300) ) bricks, _num_bricks = generate_bricks_from_volume_source( bounding_box, grid, partial(extract_subvol, volume), DebugClient() ) outer_halos = extract_halos(bricks, grid, 'outer').compute() inner_halos = extract_halos(bricks, grid, 'inner').compute() for halo_type, halo_bricks in zip(('outer', 'inner'), (outer_halos, inner_halos)): for hb in halo_bricks: # Even bricks on the edge of the volume # (which have smaller physical boxes than logical boxes) # return halos which correspond to the original # logical box (except for the halo axis). # (Each halo's "logical box" still corresponds to # the brick it was extracted from.) if halo_type == 'outer': assert (hb.physical_box[0] != hb.logical_box[0]).sum() == 1 assert (hb.physical_box[1] != hb.logical_box[1]).sum() == 1 else: assert (hb.physical_box != hb.logical_box).sum() == 1 # The bounding box above is not grid aligned, # so blocks on the volume edge will only have partial data # (i.e. a smaller physical_box than logical_box) # However, halos are always produced to correspond to the logical_box size, # and zero-padded if necessary to achieve that size. # Therefore, only compare the actually valid portion of the halo here with the expected volume. # The other voxels should be zeros. valid_box = box_intersection(bounding_box, hb.physical_box) halo_vol = extract_subvol(hb.volume, valid_box - hb.physical_box[0]) expected_vol = extract_subvol(volume, valid_box) assert (halo_vol == expected_vol).all() # Other voxels should be zero full_halo_vol = hb.volume.copy() overwrite_subvol(full_halo_vol, valid_box - hb.physical_box[0], 0) assert (full_halo_vol == 0).all() rows = [] for hb in chain(outer_halos): rows.append([*hb.physical_box.flat, hb, 'outer']) for hb in chain(inner_halos): rows.append([*hb.physical_box.flat, hb, 'inner']) halo_df = pd.DataFrame(rows, columns=['y0', 'x0', 'y1', 'x1', 'brick', 'halo_type']) halo_counts = halo_df.groupby(['y0', 'x0', 'y1', 'x1']).size() # Since the bricks' physical boxes are all clipped to the overall bounding-box, # every outer halo should have a matching inner halo from a neighboring brick. # (This would not necessarily be true for Bricks that are initialized from a sparse mask.) assert halo_counts.min() == 2 assert halo_counts.max() == 2 for _box, halos_df in halo_df.groupby(['y0', 'x0', 'y1', 'x1']): assert set(halos_df['halo_type']) == set(['outer', 'inner']) brick0 = halos_df.iloc[0]['brick'] brick1 = halos_df.iloc[1]['brick'] assert (brick0.volume == brick1.volume).all()