def test_sparse_boxes_WITH_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) # since mask offset is 20, this spans 3 bricks (physical: 20-70, logical: 0-90) block_mask[0, 0, 0:5] = True # spans a single brick (physical: 30-60, logical: 30-60) block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT OFFSET mask_box_start = np.array([0, 10, 20]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30), (0, 0, 0)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 90]], [[0, 20, 30], [10, 30, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 10, 20], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]], [[0, 10, 60], [10, 20, 70]], [[0, 20, 30], [10, 30, 60]]]).all()
def test_sparse_boxes_NO_OFFSET(): block_mask = np.zeros((5, 6, 7), dtype=bool) block_mask[0, 0, 0:5] = True block_mask[0, 1, 1:4] = True block_mask_resolution = 10 # MASK STARTS AT ORIGIN (NO OFFSET) mask_box_start = np.array([0, 0, 0]) mask_box_stop = mask_box_start + 10 * np.array(block_mask.shape) block_mask_box = (mask_box_start, mask_box_stop) brick_grid = Grid((10, 10, 30)) sparse_block_mask = SparseBlockMask(block_mask, block_mask_box, block_mask_resolution) logical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=True) assert (logical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 60]], [[0, 10, 0], [10, 20, 30]], [[0, 10, 30], [10, 20, 60]]]).all() physical_boxes = sparse_block_mask.sparse_boxes(brick_grid, return_logical_boxes=False) assert (physical_boxes == [[[0, 0, 0], [10, 10, 30]], [[0, 0, 30], [10, 10, 50]], [[0, 10, 10], [10, 20, 30]], [[0, 10, 30], [10, 20, 40]]]).all()
def init_boxes(self, volume_service, roi): if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) if roi["scale"] is not None: scale = roi["scale"] elif isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" else: scale = 0 brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5 - scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5 - scale) with Timer( f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale)) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def init_boxes(self, volume_service, roi): if not roi: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service assert isinstance(base_service, DvidVolumeService), \ "Can't specify an ROI unless you're using a dvid input" assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \ "The 'roi' option doesn't support adapters other than 'rescale-level'" scale = 0 if isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" server, uuid, _seg_instance = base_service.instance_triple brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5 - scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5 - scale) with Timer( f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(server, uuid, roi, format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5 - scale)) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def test_get_fullres_mask(): coarse_mask = np.random.randint(2, size=(10, 10), dtype=bool) full_mask = upsample(coarse_mask, 10) sbm = SparseBlockMask(coarse_mask, [(0, 0), (100, 100)], (10, 10)) # Try the exact bounding box extracted = sbm.get_fullres_mask([(0, 0), (100, 100)]) assert (extracted == full_mask).all() # Try a bounding box that exceeds the original mask # (excess region should be all zeros) extracted = sbm.get_fullres_mask([(10, 20), (150, 150)]) assert extracted.shape == (140, 130) expected = np.zeros((140, 130), dtype=bool) expected[:90, :80] = full_mask[10:, 20:] assert (extracted == expected).all()
def sparse_block_mask_for_labels(self, labels, clip=True): """ Determine which bricks (each with our ``preferred_message_shape``) would need to be accessed download all data for the given labels, and return the result as a ``SparseBlockMask`` object. This function uses a dask to fetch the coarse sparsevols in parallel. The sparsevols are extracted directly from the labelindex. If the ``self.supervoxels`` is True, the labels are grouped by body before fetching the labelindexes, to avoid fetching the same labelindexes more than once. Args: labels: A list of body IDs (if ``self.supervoxels`` is False), or supervoxel IDs (if ``self.supervoxels`` is True). clip: If True, filter the results to exclude any coordinates that fall outside this service's bounding-box. Otherwise, all brick coordinates that encompass the given label groups will be returned, whether or not they fall within the bounding box. Returns: ``SparseBlockMask`` """ from neuclease.util import SparseBlockMask coords_df = self.sparse_brick_coords_for_labels(labels, clip) coords_df.drop_duplicates(['z', 'y', 'x'], inplace=True) brick_shape = self.preferred_message_shape coords_df[['z', 'y', 'x']] //= brick_shape coords = coords_df[['z', 'y', 'x']].values return SparseBlockMask.create_from_lowres_coords(coords, brick_shape)
def init_boxes(self, volume_service, roi, chunk_shape_s0): """ Return a set of bounding boxes to tile the given ROI. Scale 0 of the volume service should correspond to full-res data, which is 32x higher-res than ROI resolution. """ if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, chunk_shape_s0, clipped=True) return np.array([*boxes]) base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) assert not (chunk_shape_s0 % 2**5).any(), \ "If using an ROI, select a chunk shape that is divisible by 32" seg_box_s0 = volume_service.bounding_box_zyx seg_box_s0 = round_box(seg_box_s0, 2**5) seg_box_s5 = seg_box_s0 // 2**5 with Timer( f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box_s0, 2**5) boxes = sbm.sparse_boxes(chunk_shape_s0) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def init_brickwall(self, volume_service, subset_labels, roi): sbm = None if roi["name"]: base_service = volume_service.base_service if not roi["server"] or not roi["uuid"]: assert isinstance(base_service, DvidVolumeService), \ "Since you aren't using a DVID input source, you must specify the ROI server and uuid." roi["server"] = (roi["server"] or volume_service.server) roi["uuid"] = (roi["uuid"] or volume_service.uuid) if roi["scale"] is not None: scale = roi["scale"] elif isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" else: scale = 0 brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**(5-scale)) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5-scale) with Timer(f"Fetching mask for ROI '{roi['name']}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(roi["server"], roi["uuid"], roi["name"], format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**(5-scale)) elif subset_labels: try: sbm = volume_service.sparse_block_mask_for_labels([*subset_labels]) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError("Could not find sparse masks for any of the subset-labels") except NotImplementedError: sbm = None with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes # Apply halo WHILE downloading the data. # TODO: Allow the user to configure whether or not the halo should # be fetched from the outset, or added after the blocks are loaded. halo = self.config["connectedcomponents"]["halo"] brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x') return brickwall
def init_boxes(self, volume_service, subset_labels, roi): sbm = None if roi: base_service = volume_service.base_service assert isinstance(base_service, DvidVolumeService), \ "Can't specify an ROI unless you're using a dvid input" assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \ "The 'roi' option doesn't support adapters other than 'rescale-level'" scale = 0 if isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" server, uuid, _seg_instance = base_service.instance_triple brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, brick_shape) seg_box_s0 = seg_box * 2**scale seg_box_s5 = seg_box // 2**(5 - scale) with Timer( f"Fetching mask for ROI '{roi}' ({seg_box_s0[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(server, uuid, roi, format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask.create_from_highres_mask( roi_mask_s5, 2**(5 - scale), seg_box, brick_shape) elif subset_labels: try: sbm = volume_service.sparse_block_mask_for_labels( [*subset_labels]) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError( "Could not find sparse masks for any of the subset-labels" ) except NotImplementedError: sbm = None if sbm is None: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) else: return sbm.sparse_boxes(brick_shape)
def _get_sparse_block_mask(self, volume_service): """ If the user's config specified a sparse subset of bodies to process, Return a SparseBlockMask object indicating where those bodies reside. If the user did not specify a 'subset-bodies' list, returns None, indicating that all segmentation blocks in the volume should be read. Also, if the input volume is not from a DvidVolumeService, return None. (In that case, the 'subset-bodies' feature can be used, but it isn't as efficient.) """ import pandas as pd config = self.config_data sparse_body_ids = config["mesh-config"]["storage"]["subset-bodies"] if not sparse_body_ids: return None if not isinstance(volume_service.base_service, DvidVolumeService): # We only know how to retrieve sparse blocks for DVID volumes. # For other volume sources, we'll just have to fetch everything and filter # out the unwanted bodies at the mask aggregation step. return None grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"] assert grouping_scheme in ('no-groups', 'singletons', 'labelmap'), \ f"Not allowed to use 'subset-bodies' setting for grouping scheme: {grouping_scheme}" if grouping_scheme in ('no-groups', 'singletons'): # The 'body ids' are identical to segment ids sparse_segment_ids = sparse_body_ids elif grouping_scheme == 'labelmap': # We need to convert the body ids into sparse segment ids mapping_pairs = self.load_labelmap() segments, bodies = mapping_pairs.transpose() # pandas.Series permits duplicate index values, # which is convenient for this reverse lookup reverse_lookup = pd.Series(index=bodies, data=segments) sparse_segment_ids = reverse_lookup.loc[sparse_body_ids].values # Fetch the sparse mask of blocks that the sparse segments belong to dvid_service = volume_service.base_service block_mask, lowres_box, block_shape = \ sparkdvid.get_union_block_mask_for_bodies( dvid_service.server, dvid_service.uuid, dvid_service.instance_name, sparse_segment_ids ) fullres_box = lowres_box * block_shape return SparseBlockMask(block_mask, fullres_box, block_shape)
def init_boxes(self, volume_service, roi): if not roi["name"]: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) server, uuid, roi_name = roi["server"], roi["uuid"], roi["name"] roi_scale = roi["relative-scale"] brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**roi_scale).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, 2**roi_scale) seg_box_s5 = seg_box // 2**roi_scale with Timer( f"Fetching mask for ROI '{roi_name}' ({seg_box[:, ::-1].tolist()})", logger): roi_mask_s5, _ = fetch_roi(server, uuid, roi_name, format='mask', mask_box=seg_box_s5) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask(roi_mask_s5, seg_box, 2**roi_scale) boxes = sbm.sparse_boxes(brick_shape) # Clip boxes to the true (not rounded) bounding box boxes[:, 0] = np.maximum(boxes[:, 0], volume_service.bounding_box_zyx[0]) boxes[:, 1] = np.minimum(boxes[:, 1], volume_service.bounding_box_zyx[1]) return boxes
def init_brickwall(self, volume_service, subset_groups): try: brick_coords_df = volume_service.sparse_brick_coords_for_label_groups( subset_groups) np.save('brick-coords.npy', brick_coords_df.to_records(index=False)) brick_shape = volume_service.preferred_message_shape brick_indexes = brick_coords_df[['z', 'y', 'x' ]].values // brick_shape sbm = SparseBlockMask.create_from_lowres_coords( brick_indexes, brick_shape) except NotImplementedError: logger.warning( "The volume service does not support sparse fetching. All bricks will be analyzed." ) sbm = None with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes # Apply halo WHILE downloading the data. # TODO: Allow the user to configure whether or not the halo should # be fetched from the outset, or added after the blocks are loaded. halo = self.config["findadjacencies"]["halo"] brickwall = BrickWall.from_volume_service( volume_service, 0, None, self.client, target_partition_size_voxels, halo, sbm, compression='lz4_2x') return brickwall
def test_copysegmentation_from_hdf5_to_dvid_custom_sbm( setup_hdf5_segmentation_input, disable_auto_retry): template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name = setup_hdf5_segmentation_input # Our bricks are long in Z, so use a mask that's aligned that way, too. mask = np.zeros(volume.shape, bool) mask[:, :, 64:128] = True mask[:, :, 192:256] = True sbm = SparseBlockMask(mask[::64, ::64, ::64], [(0, 0, 0), volume.shape], (64, 64, 64)) with open(f"{template_dir}/sbm.pkl", 'wb') as f: pickle.dump(sbm, f) config["copysegmentation"]["sparse-block-mask"] = f"{template_dir}/sbm.pkl" setup = (template_dir, config, volume, dvid_address, repo_uuid, output_segmentation_name) box_zyx, expected_vol, output_vol = _run_to_dvid(setup, check_scale_0=False) expected_vol = expected_vol.copy() mask = mask[box_to_slicing(*box_zyx)] expected_vol[~mask] = 0 assert (output_vol == expected_vol).all()
def _process_slab(self, slab_index, output_slab_box): """ (The main work of this file.) Process a large slab of voxels: 1. Read a 'slab' of bricks from the input as a BrickWall 2. Translate it to the output coordinates. 3. Splice & group the bricks so that they are aligned to the optimal output grid 4. 'Pad' the bricks on the edges of the wall by *reading* data from the output destination, so that all bricks are complete (i.e. they completely fill their grid block). 5. Write all bricks to the output destination. 6. Downsample the bricks and repeat steps 3-5 for the downsampled scale. """ options = self.config["copysegmentation"] pyramid_depth = options["pyramid-depth"] input_slab_box = output_slab_box - self.translation_offset_zyx if self.sbm is None: slab_sbm = None else: slab_sbm = SparseBlockMask.create_from_sbm_box( self.sbm, input_slab_box) try: input_wall = BrickWall.from_volume_service( self.input_service, 0, input_slab_box, self.client, self.target_partition_size_voxels, sparse_block_mask=slab_sbm, compression=options['brick-compression']) if input_wall.num_bricks == 0: logger.info( f"Slab: {slab_index}: No bricks to process. Skipping.") return except RuntimeError as ex: if "SparseBlockMask selects no blocks" in str(ex): return input_wall.persist_and_execute( f"Slab {slab_index}: Reading ({input_slab_box[:,::-1].tolist()})", logger) # Translate coordinates from input to output # (which will leave the bricks in a new, offset grid) # This has no effect on the brick volumes themselves. if any(self.translation_offset_zyx): input_wall = input_wall.translate(self.translation_offset_zyx) id_offset = options["add-offset-to-ids"] if id_offset != 0: def add_offset(brick): # Offset everything except for label 0, which remains 0 vol = brick.volume.copy() brick.compress() vol[vol != 0] += id_offset return vol input_wall = input_wall.map_brick_volumes(add_offset) output_service = self.output_service # Pad internally to block-align to the OUTPUT alignment. # Here, we assume that any output labelmap (if any) is idempotent, # so it's okay to read pre-existing output data that will ultimately get remapped. padded_wall = self._consolidate_and_pad(slab_index, input_wall, 0, output_service) # Write scale 0 to DVID if not options["skip-scale-0-write"]: self._write_bricks(slab_index, padded_wall, 0, output_service) if options["compute-block-statistics"]: with Timer(f"Slab {slab_index}: Computing slab block statistics", logger): if options["compute-block-statistics"] is True: block_shape = 3 * [ self.output_service.base_service.block_width ] else: block_shape = options["compute-block-statistics"] def block_stats_for_brick(brick): vol = brick.volume brick.compress() return block_stats_for_volume(block_shape, vol, brick.physical_box) slab_block_stats_per_brick = padded_wall.bricks.map( block_stats_for_brick).compute() slab_block_stats_df = pd.concat(slab_block_stats_per_brick, ignore_index=True) del slab_block_stats_per_brick with Timer( f"Slab {slab_index}: Appending stats and overwriting stats file" ): self._append_slab_statistics(slab_block_stats_df) for new_scale in range(1, 1 + pyramid_depth): if options[ "download-pre-downsampled"] and new_scale in self.input_service.available_scales: del padded_wall downsampled_wall = BrickWall.from_volume_service( self.input_service, new_scale, input_slab_box, self.client, self.target_partition_size_voxels, compression=options["brick-compression"]) downsampled_wall.persist_and_execute( f"Slab {slab_index}: Scale {new_scale}: Downloading pre-downsampled bricks", logger) else: # Compute downsampled (results in smaller bricks) downsampled_wall = padded_wall.downsample( (2, 2, 2), method=options["downsample-method"]) downsampled_wall.persist_and_execute( f"Slab {slab_index}: Scale {new_scale}: Downsampling", logger) del padded_wall # Consolidate to full-size bricks and pad internally to block-align consolidated_wall = self._consolidate_and_pad( slab_index, downsampled_wall, new_scale, output_service) del downsampled_wall # Write to DVID self._write_bricks(slab_index, consolidated_wall, new_scale, output_service) padded_wall = consolidated_wall del consolidated_wall del padded_wall
def _init_masks(self): options = self.config["copysegmentation"] self.sbm = None if options["sparse-block-mask"]: # In theory, we could just take the intersection of the masks involved. # But I'm too lazy to think about that right now. assert not options["input-mask-labels"] and not options["output-mask-labels"], \ "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels" with open(options["sparse-block-mask"], 'rb') as f: self.sbm = pickle.load(f) is_supervoxels = False if isinstance(self.input_service.base_service, DvidVolumeService): is_supervoxels = self.input_service.base_service.supervoxels output_mask_labels = load_body_list(options["output-mask-labels"], is_supervoxels) self.output_mask_labels = set(output_mask_labels) output_sbm = None if len(output_mask_labels) > 0: if (self.output_service.preferred_message_shape != self.input_service.preferred_message_shape).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape" ) elif (self.output_service.bounding_box_zyx != self.input_service.bounding_box_zyx).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box" ) else: try: output_sbm = self.output_service.sparse_block_mask_for_labels( output_mask_labels) except NotImplementedError: output_sbm = None input_mask_labels = load_body_list(options["input-mask-labels"], is_supervoxels) input_sbm = None if len(input_mask_labels) > 0: try: input_sbm = self.input_service.sparse_block_mask_for_labels( input_mask_labels) except NotImplementedError: input_sbm = None if self.sbm is not None: pass elif input_sbm is None: self.sbm = output_sbm elif output_sbm is None: self.sbm = input_sbm else: assert (input_sbm.resolution == output_sbm.resolution).all(), \ "FIXME: At the moment, you can't supply both an input mask and an output "\ "mask unless the input and output sources use the same brick shape (message-block-shape)" final_box = box_intersection(input_sbm.box, output_sbm.box) input_box = (input_sbm.box - final_box) // input_sbm.resolution input_mask = extract_subvol(input_sbm.lowres_mask, input_box) output_box = (output_sbm - final_box) // output_sbm.resolution output_mask = extract_subvol(output_sbm.lowres_mask, output_box) assert input_mask.shape == output_mask.shape assert input_mask.dtype == output_mask.dtype == np.bool final_mask = (input_mask & output_mask) self.sbm = SparseBlockMask(final_mask, final_box, input_sbm.resolution) id_offset = options["add-offset-to-ids"] if id_offset != 0: id_offset = options["add-offset-to-ids"] input_mask_labels = np.asarray(input_mask_labels, np.uint64) input_mask_labels += id_offset self.input_mask_labels = set(input_mask_labels)
def init_boxes(self, volume_service, subset_labels, roi): sbm = None if roi: base_service = volume_service.base_service assert isinstance(base_service, DvidVolumeService), \ "Can't specify an ROI unless you're using a dvid input" assert isinstance(volume_service, (ScaledVolumeService, DvidVolumeService)), \ "The 'roi' option doesn't support adapters other than 'rescale-level'" scale = 0 if isinstance(volume_service, ScaledVolumeService): scale = volume_service.scale_delta assert scale <= 5, \ "The 'roi' option doesn't support volumes downscaled beyond level 5" server, uuid, _seg_instance = base_service.instance_triple brick_shape = volume_service.preferred_message_shape assert not (brick_shape % 2**(5-scale)).any(), \ "If using an ROI, select a brick shape that is divisible by 32" seg_box = volume_service.bounding_box_zyx seg_box = round_box(seg_box, brick_shape) seg_box_s5 = seg_box // 2**(5 - scale) with Timer(f"Fetching mask for ROI '{roi}'", logger): roi_mask_s5, roi_box_s5 = fetch_roi(server, uuid, roi, format='mask') # Restrict to input bounding box clipped_roi_box_s5 = box_intersection(seg_box_s5, roi_box_s5) clipped_roi_mask_s5 = extract_subvol( roi_mask_s5, clipped_roi_box_s5 - roi_box_s5[0]) # Align to brick grid aligned_roi_box_s5 = round_box(clipped_roi_box_s5, brick_shape // 2**5, 'out') padding = (aligned_roi_box_s5 - clipped_roi_box_s5) padding[0] *= -1 aligned_roi_mask_s5 = np.pad(clipped_roi_mask_s5, padding.transpose()) # At the service native scale aligned_roi_box = (2**(5 - scale) * aligned_roi_box_s5) logger.info( f"Brick-aligned ROI '{roi}' has bounding-box {aligned_roi_box[:, ::-1].tolist()}" ) # SBM 'full-res' corresponds to the input service voxels, not necessarily scale-0. sbm = SparseBlockMask.create_from_highres_mask( aligned_roi_mask_s5, 2**(5 - scale), aligned_roi_box, brick_shape) elif subset_labels: try: sbm = volume_service.sparse_block_mask_for_labels( [*subset_labels]) if ((sbm.box[1] - sbm.box[0]) == 0).any(): raise RuntimeError( "Could not find sparse masks for any of the subset-labels" ) except NotImplementedError: sbm = None if sbm is None: boxes = boxes_from_grid(volume_service.bounding_box_zyx, volume_service.preferred_message_shape, clipped=True) return np.array([*boxes]) else: boxes = sbm.sparse_boxes(brick_shape) boxes = np.array(boxes) # Clip boxes[:, 0, :] = np.maximum(volume_service.bounding_box_zyx[0], boxes[:, 0, :]) boxes[:, 1, :] = np.minimum(volume_service.bounding_box_zyx[1], boxes[:, 1, :]) assert (boxes[:,0,:] < boxes[:,1,:]).all(), \ "After cropping to input volume, some bricks disappeared." return boxes
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--no-downres', action='store_true') parser.add_argument('--only-within-roi') parser.add_argument('--not-within-roi') parser.add_argument('dvid_server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') parser.add_argument('sparsevol_files', nargs='+') args = parser.parse_args() instance_info = (args.dvid_server, args.uuid, args.labelmap_instance) assert not args.only_within_roi or not args.not_within_roi, \ "Can't supply both --only-within-roi and --not-within-roi. Pick one or the other (or neither)." roi = args.only_within_roi or args.not_within_roi invert_roi = (args.not_within_roi is not None) if roi: roi_mask, mask_box = fetch_roi(args.dvid_server, args.uuid, roi, format='mask') roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5), 2**5) # ROIs are provided at scale 5 else: roi_sbm = None # Ideally, we would choose the max label for the node we're writing to, # but the /maxlabel endpoint doesn't work for all nodes # instead, we'll use the repo-wide maxlabel from the /info JSON. #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance) maxlabel = fetch_instance_info( args.dvid_server, args.uuid, args.labelmap_instance)["Extended"]["MaxRepoLabel"] for i, path in enumerate(args.sparsevol_files): maxlabel += 1 name = os.path.split(path)[1] prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ") with Timer(f"Pasting {name} as {maxlabel}", logger): overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel, path, roi_sbm, invert_roi, args.no_downres, prefix_logger) results_path = os.path.splitext(path)[0] + '.json' with open(results_path, 'w') as f: results = { 'new-label': maxlabel, 'overwritten_labels': sorted(overwritten_labels) } json.dump(results, f, indent=2, cls=NumpyConvertingEncoder) logger.info(f"Done.")
def execute(self): self._sanitize_config() input_config = self.config["input"] options = self.config["samplepoints"] resource_config = self.config["resource-manager"] resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"]) volume_service = VolumeService.create_from_config(input_config, resource_mgr_client) input_csv = options["input-table"] with Timer(f"Reading {input_csv}", logger): coordinate_table_df = pd.read_csv(input_csv, header=0, dtype=CSV_TYPES) points = coordinate_table_df[['z', 'y', 'x']].values rescale = options["rescale-points-to-level"] if rescale != 0: points //= (2**rescale) # All points must lie within the input volume points_box = [points.min(axis=0), 1+points.max(axis=0)] if (box_intersection(points_box, volume_service.bounding_box_zyx) != points_box).all(): raise RuntimeError("The point list includes points outside of the volume bounding box.") with Timer("Sorting points by Brick ID", logger): # 'Brick ID' is defined as the divided corner coordinate brick_shape = volume_service.preferred_message_shape brick_ids_and_points = np.concatenate( (points // brick_shape, points), axis=1 ) brick_ids_and_points = lexsort_columns(brick_ids_and_points) brick_ids = brick_ids_and_points[: ,:3] points = brick_ids_and_points[:, 3:] # Extract the first row of each group to get the set of unique brick IDs point_group_spans = groupby_spans_presorted(brick_ids) point_group_starts = (start for start, stop in point_group_spans) unique_brick_ids = brick_ids[np.fromiter(point_group_starts, np.int32)] with Timer("Constructing sparse mask", logger): # BrickWall.from_volume_service() supports the ability to initialize a sparse RDD, # with only a subset of Bricks (rather than a dense RDD containing every brick # within the volume bounding box). # It requires a SparseBlockMask object indicating exactly which Bricks need to be fetched. brick_mask_box = np.array([unique_brick_ids.min(axis=0), 1+unique_brick_ids.max(axis=0)]) brick_mask_shape = (brick_mask_box[1] - brick_mask_box[0]) brick_mask = np.zeros(brick_mask_shape, bool) brick_mask_coords = unique_brick_ids - brick_mask_box[0] brick_mask[tuple(brick_mask_coords.transpose())] = True sbm = SparseBlockMask(brick_mask, brick_mask_box*brick_shape, brick_shape) with Timer("Initializing BrickWall", logger): # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 target_partition_size_voxels = 2 * GB // np.uint64().nbytes brickwall = BrickWall.from_volume_service(volume_service, 0, None, self.client, target_partition_size_voxels, 0, sbm, lazy=True) with Timer(f"Grouping {len(points)} points", logger): # This is faster than pandas.DataFrame.groupby() for large data point_groups = groupby_presorted(points, brick_ids) id_and_ptgroups = list(zip(unique_brick_ids, point_groups)) num_groups = len(id_and_ptgroups) with Timer(f"Join {num_groups} point groups with bricks", logger): id_and_ptgroups = dask.bag.from_sequence( id_and_ptgroups, npartitions=brickwall.bricks.npartitions ) id_and_ptgroups = id_and_ptgroups.map(lambda i_p: (*i_p[0], i_p[1])) id_and_ptgroups_df = id_and_ptgroups.to_dataframe(columns=['z', 'y', 'x', 'pointgroup']) ids_and_bricks = brickwall.bricks.map(lambda brick: (*(brick.logical_box[0] // brick_shape), brick)) ids_and_bricks_df = ids_and_bricks.to_dataframe(columns=['z', 'y', 'x', 'brick']) def set_brick_id_index(df): def set_brick_id(df): df['brick_id'] = encode_coords_to_uint64( df[['z', 'y', 'x']].values.astype(np.int32) ) return df df['brick_id'] = np.uint64(0) df = df.map_partitions(set_brick_id, meta=df) # Note: bricks and pointgroups are already sorted by # brick scan-order so, brick_id is already sorted. # Specifying sorted=True is critical to performance here. df = df.set_index('brick_id', sorted=True) return df # Give them matching indexes ids_and_bricks_df = set_brick_id_index(ids_and_bricks_df) id_and_ptgroups_df = set_brick_id_index(id_and_ptgroups_df) # Join (index-on-index, so it should be fast) ptgroup_and_brick_df = id_and_ptgroups_df.merge( ids_and_bricks_df, how='left', left_index=True, right_index=True ) ptgroup_and_brick_df = ptgroup_and_brick_df[['pointgroup', 'brick']] ptgroup_and_brick = ptgroup_and_brick_df.to_bag() # Persist and force computation before proceeding. #ptgroup_and_brick = persist_and_execute(ptgroup_and_brick, "Persisting joined point groups", logger, False) #assert ptgroup_and_brick.count().compute() == num_groups == brickwall.num_bricks def sample_points(points_and_brick): """ Given a Brick and array of points (N,3) that lie within it, sample labels from the points within the brick and return a record array containing the points and the sampled labels. """ points, brick = points_and_brick result_dtype = [('z', np.int32), ('y', np.int32), ('x', np.int32), ('label', np.uint64)] result = np.zeros((len(points),), result_dtype) result['z'] = points[:,0] result['y'] = points[:,1] result['x'] = points[:,2] # Make relative to brick offset points -= brick.physical_box[0] result['label'] = brick.volume[tuple(points.transpose())] return result with Timer("Sampling bricks", logger): brick_samples = ptgroup_and_brick.map(sample_points).compute() with Timer("Concatenating samples", logger): sample_table = np.concatenate(brick_samples) with Timer("Sorting samples", logger): # This will sort in terms of the SCALED z,y,x coordinates sample_table.sort() with Timer("Sorting table", logger): if rescale == 0: coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True) else: # sample_table is sorted by RESCALED coordiante, # so sort our table the same way coordinate_table_df['rz'] = coordinate_table_df['z'] // (2**rescale) coordinate_table_df['ry'] = coordinate_table_df['y'] // (2**rescale) coordinate_table_df['rx'] = coordinate_table_df['x'] // (2**rescale) coordinate_table_df.sort_values(['rz', 'ry', 'rx'], inplace=True) del coordinate_table_df['rz'] del coordinate_table_df['ry'] del coordinate_table_df['rx'] # Now that samples and input rows are sorted identically, # append the results output_col = options["output-column"] coordinate_table_df[output_col] = sample_table['label'].copy() if rescale != 0: with Timer("Re-sorting table at scale 0", logger): # For simplicity (API and testing), we guarantee that coordinates are sorted in the output. # In the case of rescaled points, they need to be sorted once more (at scale 0 this time) coordinate_table_df.sort_values(['z', 'y', 'x'], inplace=True) with Timer("Exporting samples", logger): coordinate_table_df.to_csv(options["output-table"], header=True, index=False) logger.info("DONE.")