def test_dump_config(): data = {'a': flow_style([1, 2, 3])} dumped = dump_config(data) expected = textwrap.dedent("""\ a: [1, 2, 3] """) assert dumped == expected
def test_emit_defaults_with_flow_style(): schema = copy.deepcopy(TEST_SCHEMA) d = schema['properties']['myobject']['default'] schema['properties']['myobject']['default'] = flow_style(d) defaults = emit_defaults(schema) assert defaults['myobject'].fa.flow_style() # Make sure defaults still validate # (despite being yaml CommentedMap or whatever) validate(defaults, schema)
class CopySegmentation(Workflow): """ Workflow to copy segmentation from one source (e.g. a DVID segmentation instance or a BrainMaps volume) into a DVID segmentation instance. Notes: - The data is written to DVID in block-aligned 'bricks'. If the source data is not block-aligned at the edges, pre-existing data (if any) is read from the DVID destination to fill out ('pad') the bricks until they are completely block aligned. - The data is also downsampled into a multi-scale pyramid and uploaded. - The volume is processed in Z-slabs. To avoid complications during downsampling, the Z-slabs must be aligned to a multiple of the DVID block shape, which may be rather large, depending on the highest scale of the pyramid. (It is recommended that you don't set this explicitly in the config, so a suitable default can be chosen for you.) - This workflow uses DvidVolumeService to write the segmentation blocks, which is able to send them to DVID in the pre-encoded 'labelarray' or 'labelmap' block format. This saves CPU resources on the DVID server. - As a convenience, size of each label 'body' in the copied volume is also calculated and exported in an HDF5 file, sorted by body size. """ OptionsSchema = { "type": "object", "additionalProperties": False, "default": {}, "properties": { "block-statistics-file": { "description": "Where to store block statistics for the INPUT segmentation\n" "(but translated to output coordinates).\n" "If the file already exists, it will be appended to (for restarting from a failed job).\n" "Supported formats: .csv and .h5", "type": "string", "default": "block-statistics.h5" }, "compute-block-statistics": { "description": "Whether or not to compute block statistics (from the scale 0 data).\n" "Usually you'll need the statistics file to load labelindexes after copying the voxels,\n" "but in some cases you might not need them (e.g. adding pyramids after ingesting only scale 0).\n" "By default, the block shape will be chosen according to the output volume,\n" "but you can provide a custom shape here.\n", "oneOf": [{ "type": "boolean" }, { "type": "array", "items": { "type": "integer" }, "minItems": 3, "maxItems": 3, "default": flow_style([-1, -1, -1]) }], "default": True }, "pyramid-depth": { "description": "Number of pyramid levels to generate \n" "(-1 means choose automatically, 0 means no pyramid)", "type": "integer", "default": -1 # automatic by default }, "permit-inconsistent-pyramid": { "description": "Normally overwriting a pre-existing data instance is\n" "an error unless you rewrite ALL of its pyramid levels,\n" "but this setting allows you to override that error.\n" "(You had better know what you're doing...)\n", "type": "boolean", "default": False }, "skip-scale-0-write": { "description": "Skip writing scale 0. Useful if scale 0 is already downloaded and now\n" "you just want to generate the rest of the pyramid to the same instance.\n", "type": "boolean", "default": False }, "download-pre-downsampled": { "description": "Instead of downsampling the data, just download the pyramid from the server (if it's available).\n" "Will not work unless you add the 'available-scales' setting to the input service's geometry config.", "type": "boolean", "default": False }, "downsample-method": { "description": "Which downsampling method to use for label volume downsampling.\n", "type": "string", "enum": DOWNSAMPLE_METHODS, # FIXME: This not the fastest method, but the fastest method was # observed to segfault in one conda environment. # Need to investigate! "default": "labels-numba" }, "brick-compression": { "description": "Internally, downloaded bricks will be stored in a compressed format.\n" "This setting specifies the compression scheme to use.\n" f"Options: {COMPRESSION_METHODS}" "Note: This affects only in-memory storage while the workflow is running.\n" " It does NOT affect the compression used in DVID.\n", "type": "string", "enum": COMPRESSION_METHODS, "default": "lz4_2x" }, "write-empty-blocks": { "description": "If a copied block would be completely empty, it can be skipped\n" "if you're writing to a brand new volume.\n" "By default, we don't bother writing such blocks.\n" "Set this to True if you want to overwrite existing blocks with empty ones.", "type": "boolean", "default": False, }, "dont-overwrite-identical-blocks": { "description": "Before writing each block, read the existing segmentation from DVID\n" "and check to see if it already matches what will be written.\n" "If our write would be a no-op, don't write it.\n", "type": "boolean", "default": False }, "slab-depth": { "description": "The data is downloaded and processed in Z-slabs.\n" "This setting determines how thick each Z-slab is.\n" "Should be a multiple of (block_width * 2**pyramid_depth) to ensure slabs\n" "are completely independent, even after downsampling.\n", "type": "integer", "default": -1 # Choose automatically: block_width * 2**pyramid_depth }, "delay-minutes-between-slabs": { "description": "Optionally introduce an artificial pause after finishing one slab before starting the next,\n" "to give DVID time to index the blocks we've sent so far.\n" "Should not be necessary for most use-cases.", "type": "integer", "default": 0, }, "sparse-block-mask": { "description": "Optionally provide a mask which limits the set of bricks to be processed.\n" "If you already have a map of where the valid data is, you can provide a\n" "pickled SparseBlockMask here.\n", "type": "string", "default": "" }, "input-mask-labels": { **BodyListSchema, "description": "If provided, only voxels under the given input labels in the output will be modified.\n" "Others will remain untouched.\n", }, "output-mask-labels": { **BodyListSchema, "description": "If provided, only voxels under the given labels in the output will be modified.\n" "Others will remain untouched.\n" "Note: At the time of this writing, the output mask is NOT used to enable sparse-fetching from DVID.\n" " Only the input mask is used for that, so if you're using an output mask without an input mask,\n" " you'll still fetch the entire input volume, even if most of it will be written unchanged!\n", }, "skip-masking-step": { "description": "When using an input mask, normally the entire output block must be fetched so it can be combined with the input.\n" "but if you know you're writing to an empty volume, or if the output happens to match the input\n" "(e.g. if you are recomputing pyramids from an existing scale-0 segmentation),\n" "then you may save time by skipping the fetch from the output.\n" "In this case, input-mask-labels are used to determine which blocks to copy,\n" "but not which voxels to copy -- all voxels in each block are directly written to the output.\n" "Note: The input will still be PADDED from the output if necessary to achieve block alignment.\n", "type": "boolean", "default": False }, "add-offset-to-ids": { "description": "If desired, add a constant offset to all input IDs before they are written to the output.", "type": "integer", "default": 0 } } } Schema = copy.deepcopy(Workflow.schema()) Schema["properties"].update({ "input": SegmentationVolumeSchema, "output": SegmentationVolumeSchema, "copysegmentation": OptionsSchema }) @classmethod def schema(cls): return CopySegmentation.Schema def execute(self): self._init_services() self._init_masks() self._log_neuroglancer_links() self._sanitize_config() # Aim for 2 GB RDD partitions when loading segmentation GB = 2**30 self.target_partition_size_voxels = 2 * GB // np.uint64().nbytes # (See note in _init_services() regarding output bounding boxes) input_bb_zyx = self.input_service.bounding_box_zyx output_bb_zyx = self.output_service.bounding_box_zyx self.translation_offset_zyx = output_bb_zyx[0] - input_bb_zyx[0] if self.translation_offset_zyx.any(): logger.info( f"Translation offset is {self.translation_offset_zyx[:, ::-1].tolist()}" ) pyramid_depth = self.config["copysegmentation"]["pyramid-depth"] slab_depth = self.config["copysegmentation"]["slab-depth"] # Process data in Z-slabs output_slab_boxes = list(slabs_from_box(output_bb_zyx, slab_depth)) max_depth = max( map(lambda box: box[1][0] - box[0][0], output_slab_boxes)) logger.info( f"Processing data in {len(output_slab_boxes)} slabs (max depth={max_depth}) for {pyramid_depth} pyramid levels" ) if self.config["copysegmentation"]["compute-block-statistics"]: self._init_stats_file() # Read data and accumulate statistics, one slab at a time. for slab_index, output_slab_box in enumerate(output_slab_boxes): with Timer() as timer: self._process_slab(slab_index, output_slab_box) logger.info( f"Slab {slab_index}: Total processing time: {timer.timedelta}") delay_minutes = self.config["copysegmentation"][ "delay-minutes-between-slabs"] if delay_minutes > 0 and slab_index != len(output_slab_boxes) - 1: logger.info( f"Delaying {delay_minutes} before continuing to next slab..." ) time.sleep(delay_minutes * 60) logger.info(f"DONE copying/downsampling all slabs") def _init_services(self): """ Initialize the input and output services, and fill in 'auto' config values as needed. Also check the service configurations for errors. """ input_config = self.config["input"] output_config = self.config["output"] mgr_options = self.config["resource-manager"] options = self.config["copysegmentation"] slab_depth = options["slab-depth"] pyramid_depth = options["pyramid-depth"] permit_inconsistent_pyramids = options["permit-inconsistent-pyramid"] self.mgr_client = ResourceManagerClient(mgr_options["server"], mgr_options["port"]) self.input_service = VolumeService.create_from_config( input_config, self.mgr_client) brick_shape = self.input_service.preferred_message_shape if slab_depth % brick_shape[0] != 0: self.input_service.preferred_message_shape[0] logger.warning( f"Your slab-depth {slab_depth} is not a multiple of the input's brick width {brick_shape[0]}" ) if isinstance(self.input_service.base_service, DvidVolumeService): assert input_config["dvid"]["supervoxels"], \ 'DVID input service config must use "supervoxels: true"' # Special handling for creation of multi-scale outputs: # auto-configure the pyramid depths multiscale_output_type = None for t in ["dvid", "n5", "zarr"]: if t in output_config and not hasattr(output_config[t], 'from_default'): multiscale_output_type = t if multiscale_output_type: out_fmt = multiscale_output_type if output_config[out_fmt]["create-if-necessary"]: if self.config["copysegmentation"][ "skip-scale-0-write"] and pyramid_depth == 0: # Nothing to write. Maybe the user is just computing block statistics. msg = ( "Since your config specifies no pyramid levels to write, no output instance will be created. " "Avoid this warning by removing 'create-if-necessary' from your config" ) logger.warning(msg) output_config[out_fmt]["create-if-necessary"] = False else: max_scale = output_config[out_fmt]["creation-settings"][ "max-scale"] if max_scale not in (-1, pyramid_depth): msg = ( f"Inconsistent max-scale ({max_scale}) and pyramid-depth ({pyramid_depth}). " "Omit max-scale from your creation-settings.") raise RuntimeError(msg) output_config[out_fmt]["creation-settings"][ "max-scale"] = pyramid_depth # Replace 'auto' dimensions with input bounding box replace_default_entries(output_config["geometry"]["bounding-box"], self.input_service.bounding_box_zyx[:, ::-1]) self.output_service = VolumeService.create_from_config( output_config, self.mgr_client) output_service = self.output_service assert isinstance(output_service, VolumeServiceWriter) if "dvid" in output_config: assert output_config["dvid"]["supervoxels"], \ 'DVID output service config must use "supervoxels: true"' if output_service.instance_name in fetch_repo_instances( output_service.server, output_service.uuid): existing_depth = self._read_pyramid_depth() if pyramid_depth not in ( -1, existing_depth) and not permit_inconsistent_pyramids: raise Exception( f"Can't set pyramid-depth to {pyramid_depth}: " f"Data instance '{output_service.instance_name}' already existed, with depth {existing_depth}" ) # These services aren't supported because we copied some geometry (bounding-box) # directly from the input service. assert not isinstance(output_service, TransposedVolumeService) assert not isinstance( output_service, ScaledVolumeService) or output_service.scale_delta == 0 if isinstance(self.output_service.base_service, DvidVolumeService): assert output_service.base_service.disable_indexing, \ "During ingestion, dvid labelmap indexing should be disabled.\n" \ "Please add 'disable-indexing: true' to your output dvid config." logger.info( f"Output bounding box (xyz) is: {output_service.bounding_box_zyx[:,::-1].tolist()}" ) input_shape = -np.subtract(*self.input_service.bounding_box_zyx) output_shape = -np.subtract(*output_service.bounding_box_zyx) assert not any(np.array(output_service.preferred_message_shape) % output_service.block_width), \ "Output message-block-shape should be a multiple of the block size in all dimensions." assert (input_shape == output_shape).all(), \ "Input bounding box and output bounding box do not have the same dimensions" if ("apply-labelmap" in output_config["adapters"]) and ( output_config["adapters"]["apply-labelmap"]["file-type"] != "__invalid__"): assert output_config["adapters"]["apply-labelmap"]["apply-when"] == "reading-and-writing", \ "Labelmap will be applied to voxels during pre-write and post-read (due to block padding).\n"\ "You cannot use this workflow with non-idempotent labelmaps, unless your data is already perfectly block aligned." def _init_masks(self): options = self.config["copysegmentation"] self.sbm = None if options["sparse-block-mask"]: # In theory, we could just take the intersection of the masks involved. # But I'm too lazy to think about that right now. assert not options["input-mask-labels"] and not options["output-mask-labels"], \ "Not Implemented: Can't use sparse-block-mask in conjunction with input-mask-labels or output-mask-labels" with open(options["sparse-block-mask"], 'rb') as f: self.sbm = pickle.load(f) is_supervoxels = False if isinstance(self.input_service.base_service, DvidVolumeService): is_supervoxels = self.input_service.base_service.supervoxels output_mask_labels = load_body_list(options["output-mask-labels"], is_supervoxels) self.output_mask_labels = set(output_mask_labels) output_sbm = None if len(output_mask_labels) > 0: if (self.output_service.preferred_message_shape != self.input_service.preferred_message_shape).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same brick shape" ) elif (self.output_service.bounding_box_zyx != self.input_service.bounding_box_zyx).any(): logger.warn( "Not using output mask to reduce data fetching: Your input service and output service don't have the same bounding box" ) else: try: output_sbm = self.output_service.sparse_block_mask_for_labels( output_mask_labels) except NotImplementedError: output_sbm = None input_mask_labels = load_body_list(options["input-mask-labels"], is_supervoxels) input_sbm = None if len(input_mask_labels) > 0: try: input_sbm = self.input_service.sparse_block_mask_for_labels( input_mask_labels) except NotImplementedError: input_sbm = None if self.sbm is not None: pass elif input_sbm is None: self.sbm = output_sbm elif output_sbm is None: self.sbm = input_sbm else: assert (input_sbm.resolution == output_sbm.resolution).all(), \ "FIXME: At the moment, you can't supply both an input mask and an output "\ "mask unless the input and output sources use the same brick shape (message-block-shape)" final_box = box_intersection(input_sbm.box, output_sbm.box) input_box = (input_sbm.box - final_box) // input_sbm.resolution input_mask = extract_subvol(input_sbm.lowres_mask, input_box) output_box = (output_sbm - final_box) // output_sbm.resolution output_mask = extract_subvol(output_sbm.lowres_mask, output_box) assert input_mask.shape == output_mask.shape assert input_mask.dtype == output_mask.dtype == np.bool final_mask = (input_mask & output_mask) self.sbm = SparseBlockMask(final_mask, final_box, input_sbm.resolution) id_offset = options["add-offset-to-ids"] if id_offset != 0: id_offset = options["add-offset-to-ids"] input_mask_labels = np.asarray(input_mask_labels, np.uint64) input_mask_labels += id_offset self.input_mask_labels = set(input_mask_labels) def _read_pyramid_depth(self): """ Read the MaxDownresLevel from the output instance we'll be writing to, and verify that it matches our config for pyramid-depth. """ info = fetch_instance_info(*self.output_service.instance_triple) existing_depth = int(info["Extended"]["MaxDownresLevel"]) return existing_depth def _log_neuroglancer_links(self): """ Write a link to the log file for viewing the segmentation data after it is ingested. We assume that the output server is hosting neuroglancer at http://<server>:<port>/neuroglancer/ """ if not isinstance(self.output_service.base_service, DvidVolumeService): return output_service = self.output_service server = output_service.base_service.server uuid = output_service.base_service.uuid instance = output_service.base_service.instance_name output_box_xyz = np.array(output_service.bounding_box_zyx[:, :-1]) output_center_xyz = (output_box_xyz[0] + output_box_xyz[1]) / 2 link_prefix = f"{server}/neuroglancer/#!" link_json = \ { "layers": { "segmentation": { "type": "segmentation", "source": f"dvid://{server}/{uuid}/{instance}" } }, "navigation": { "pose": { "position": { "voxelSize": [8,8,8], "voxelCoordinates": output_center_xyz.tolist() } }, "zoomFactor": 8 } } logger.info( f"Neuroglancer link to output: {link_prefix}{json.dumps(link_json)}" ) def _sanitize_config(self): """ Replace a few config values with reasonable defaults if necessary. (Note: Must be called AFTER services and output instances have been initialized.) """ options = self.config["copysegmentation"] # Overwrite pyramid depth in our config (in case the user specified -1, i.e. automatic) if options["pyramid-depth"] == -1: options["pyramid-depth"] = self._read_pyramid_depth() pyramid_depth = options["pyramid-depth"] block_width = self.output_service.block_width slab_depth = options["slab-depth"] if slab_depth == -1: slab_depth = block_width * 2**pyramid_depth options["slab-depth"] = slab_depth if (options["download-pre-downsampled"] and (options["input-mask-labels"] or options["output-mask-labels"])): # TODO: This restriction could be lifted if we also used the mask when fetching # the downscale pyramids, but that's not yet implemented. Even if you're # using 'skip-masking-step', the lowres pyramids are a problem. raise RuntimeError( "You aren't allow to use download-pre-downsampled if you're using a mask." ) if options["skip-scale-0-write"] and pyramid_depth == 0 and not options[ "compute-block-statistics"]: raise RuntimeError( "According to your config, you aren't computing block stats, " "you aren't writing scale 0, and you aren't writing pyramids. " "What exactly are you hoping will happen here?") if options["skip-masking-step"] and options["output-mask-labels"]: logger.warning( "You specified output-mask-labels but also skip-masking-step. That's usually a mistake!" ) def _init_stats_file(self): stats_path = self.config["copysegmentation"]["block-statistics-file"] if os.path.exists(stats_path): logger.info(f"Block statistics already exists: {stats_path}") logger.info(f"Will APPEND to the pre-existing statistics file.") return if stats_path.endswith('.csv'): # Initialize (just the header) template_df = pd.DataFrame(columns=list(BLOCK_STATS_DTYPES.keys())) template_df.to_csv(stats_path, index=False, header=True) elif stats_path.endswith('.h5'): # Initialize a 0-entry 1D array with the correct (structured) dtype with h5py.File(stats_path, 'w') as f: f.create_dataset('stats', shape=(0, ), maxshape=(None, ), chunks=True, dtype=list(BLOCK_STATS_DTYPES.items())) else: raise RuntimeError(f"Unknown file format: {stats_path}") def _append_slab_statistics(self, slab_stats_df): """ Append the rows of the given slab statistics DataFrame to the output statistics file. No attempt is made to drop duplicate rows (e.g. if you started from pre-existing statistics and the new bounding-box overlaps with the previous run's). Args: slab_stats_df: DataFrame to be appended to the stats file, with columns and dtypes matching BLOCK_STATS_DTYPES """ assert list(slab_stats_df.columns) == list(BLOCK_STATS_DTYPES.keys()) stats_path = self.config["copysegmentation"]["block-statistics-file"] if stats_path.endswith('.csv'): slab_stats_df.to_csv(stats_path, header=False, index=False, mode='a') elif stats_path.endswith('.h5'): with h5py.File(stats_path, 'a') as f: orig_len = len(f['stats']) new_len = orig_len + len(slab_stats_df) f['stats'].resize((new_len, )) f['stats'][orig_len:new_len] = slab_stats_df.to_records() else: raise RuntimeError(f"Unknown file format: {stats_path}") def _process_slab(self, slab_index, output_slab_box): """ (The main work of this file.) Process a large slab of voxels: 1. Read a 'slab' of bricks from the input as a BrickWall 2. Translate it to the output coordinates. 3. Splice & group the bricks so that they are aligned to the optimal output grid 4. 'Pad' the bricks on the edges of the wall by *reading* data from the output destination, so that all bricks are complete (i.e. they completely fill their grid block). 5. Write all bricks to the output destination. 6. Downsample the bricks and repeat steps 3-5 for the downsampled scale. """ options = self.config["copysegmentation"] pyramid_depth = options["pyramid-depth"] input_slab_box = output_slab_box - self.translation_offset_zyx if self.sbm is None: slab_sbm = None else: slab_sbm = SparseBlockMask.create_from_sbm_box( self.sbm, input_slab_box) try: input_wall = BrickWall.from_volume_service( self.input_service, 0, input_slab_box, self.client, self.target_partition_size_voxels, sparse_block_mask=slab_sbm, compression=options['brick-compression']) if input_wall.num_bricks == 0: logger.info( f"Slab: {slab_index}: No bricks to process. Skipping.") return except RuntimeError as ex: if "SparseBlockMask selects no blocks" in str(ex): return input_wall.persist_and_execute( f"Slab {slab_index}: Reading ({input_slab_box[:,::-1].tolist()})", logger) # Translate coordinates from input to output # (which will leave the bricks in a new, offset grid) # This has no effect on the brick volumes themselves. if any(self.translation_offset_zyx): input_wall = input_wall.translate(self.translation_offset_zyx) id_offset = options["add-offset-to-ids"] if id_offset != 0: def add_offset(brick): # Offset everything except for label 0, which remains 0 vol = brick.volume.copy() brick.compress() vol[vol != 0] += id_offset return vol input_wall = input_wall.map_brick_volumes(add_offset) output_service = self.output_service # Pad internally to block-align to the OUTPUT alignment. # Here, we assume that any output labelmap (if any) is idempotent, # so it's okay to read pre-existing output data that will ultimately get remapped. padded_wall = self._consolidate_and_pad(slab_index, input_wall, 0, output_service) # Write scale 0 to DVID if not options["skip-scale-0-write"]: self._write_bricks(slab_index, padded_wall, 0, output_service) if options["compute-block-statistics"]: with Timer(f"Slab {slab_index}: Computing slab block statistics", logger): if options["compute-block-statistics"] is True: block_shape = 3 * [ self.output_service.base_service.block_width ] else: block_shape = options["compute-block-statistics"] def block_stats_for_brick(brick): vol = brick.volume brick.compress() return block_stats_for_volume(block_shape, vol, brick.physical_box) slab_block_stats_per_brick = padded_wall.bricks.map( block_stats_for_brick).compute() slab_block_stats_df = pd.concat(slab_block_stats_per_brick, ignore_index=True) del slab_block_stats_per_brick with Timer( f"Slab {slab_index}: Appending stats and overwriting stats file" ): self._append_slab_statistics(slab_block_stats_df) for new_scale in range(1, 1 + pyramid_depth): if options[ "download-pre-downsampled"] and new_scale in self.input_service.available_scales: del padded_wall downsampled_wall = BrickWall.from_volume_service( self.input_service, new_scale, input_slab_box, self.client, self.target_partition_size_voxels, compression=options["brick-compression"]) downsampled_wall.persist_and_execute( f"Slab {slab_index}: Scale {new_scale}: Downloading pre-downsampled bricks", logger) else: # Compute downsampled (results in smaller bricks) downsampled_wall = padded_wall.downsample( (2, 2, 2), method=options["downsample-method"]) downsampled_wall.persist_and_execute( f"Slab {slab_index}: Scale {new_scale}: Downsampling", logger) del padded_wall # Consolidate to full-size bricks and pad internally to block-align consolidated_wall = self._consolidate_and_pad( slab_index, downsampled_wall, new_scale, output_service) del downsampled_wall # Write to DVID self._write_bricks(slab_index, consolidated_wall, new_scale, output_service) padded_wall = consolidated_wall del consolidated_wall del padded_wall def _consolidate_and_pad(self, slab_index, input_wall, scale, output_service): """ Consolidate (align), and pad the given BrickWall Args: scale: The pyramid scale of the data. output_service: The output_service to align to and pad from Returns a pre-executed and persisted BrickWall. """ options = self.config["copysegmentation"] # We'll pad from previously-existing pyramid data until # we have full storage blocks, e.g. (64,64,64), # but not necessarily full bricks, e.g. (64,64,6400) output_writing_grid = Grid(output_service.preferred_message_shape) storage_block_width = output_service.block_width output_padding_grid = Grid( (storage_block_width, storage_block_width, storage_block_width), output_writing_grid.offset) output_accessor_func = partial(output_service.get_subvolume, scale=scale) with Timer( f"Slab {slab_index}: Scale {scale}: Shuffling bricks into alignment", logger): # Consolidate bricks to full-size, aligned blocks (shuffles data) realigned_wall = input_wall.realign_to_new_grid( output_writing_grid, output_accessor_func) del input_wall realigned_wall.persist_and_execute() input_mask_labels = self.input_mask_labels output_mask_labels = self.output_mask_labels # If no masks are involved, we merely need to pad the existing data on the edges. # (No need to fetch the entire output.) # Similarly, if scale > 0, then the masks were already applied and the input/output data was # already combined, we can simply write the (padded) downsampled data. if scale == 0 and (input_mask_labels or output_mask_labels ) and not options["skip-masking-step"]: # If masks are involved, we must fetch the ALL the output # (unless skip-masking-step was given), # and select data from input or output according to the masks. output_service = self.output_service translation_offset_zyx = self.translation_offset_zyx def combine_with_output(input_brick): output_box = input_brick.physical_box + translation_offset_zyx output_vol = output_service.get_subvolume(output_box, scale=0) output_vol = np.asarray(output_vol, order='C') mask = None if input_mask_labels: mask = mask_for_labels(input_brick.volume, input_mask_labels) if output_mask_labels: output_mask = mask_for_labels(output_vol, output_mask_labels) if mask is None: mask = output_mask else: mask[:] &= output_mask # Start with the complete output, then # change voxels that fall within both masks. output_vol[mask] = input_brick.volume[mask] input_brick.compress() return output_vol combined_wall = realigned_wall.map_brick_volumes( combine_with_output) combined_wall.persist_and_execute( f"Slab {slab_index}: Scale {scale}: Combining masked bricks", logger) realigned_wall = combined_wall padded_wall = realigned_wall.fill_missing(output_accessor_func, output_padding_grid) del realigned_wall padded_wall.persist_and_execute( f"Slab {slab_index}: Scale {scale}: Padding", logger) return padded_wall def _write_bricks(self, slab_index, brick_wall, scale, output_service): """ Writes partition to specified dvid. """ block_width = output_service.block_width EMPTY_VOXEL = 0 dont_overwrite_identical_blocks = self.config["copysegmentation"][ "dont-overwrite-identical-blocks"] write_empty_blocks = self.config["copysegmentation"][ "write-empty-blocks"] def write_brick(brick): logger = logging.getLogger(__name__) assert (brick.physical_box % block_width == 0).all(), \ f"This function assumes each brick's physical data is already block-aligned: {brick}" if dont_overwrite_identical_blocks: try: existing_stored_brick = output_service.get_subvolume( brick.physical_box, scale) except: logger.error( f"Error reading brick: {brick.physical_box.tolist()}, scale={scale}" ) raise x_size = brick.volume.shape[2] # Find all non-zero blocks (and record by block index) block_coords = [] for block_index, block_x in enumerate(range( 0, x_size, block_width)): new_block = brick.volume[:, :, block_x:block_x + block_width] # By default, write this block if it is non-empty write_block = write_empty_blocks or (new_block != EMPTY_VOXEL).any() # If dont-overwrite-identical-blocks is enabled, # write the block if it DIFFERS from the block that was already stored in DVID. # (Regardless of whether or not either block is empty.) if dont_overwrite_identical_blocks: old_block = existing_stored_brick[:, :, block_x:block_x + block_width] difference_map = (new_block != old_block) write_block = difference_map.any() if write_block: block_coord_zyx = brick.physical_box[0] + [ 0, 0, block_x ] block_coord_xyz = block_coord_zyx[::-1].tolist() changed_voxel_list_new = np.unique( new_block[difference_map]).tolist() changed_voxel_list_old = np.unique( old_block[difference_map]).tolist() msg = ( f"Slab {slab_index}: Scale {scale}: Overwriting block: " '{ ' f'"block-coord-xyz": {block_coord_xyz}, ' f'"difference-voxel-count": {difference_map.sum()}, ' f'"new-ids": {changed_voxel_list_new}, ' f'"old-ids": {changed_voxel_list_old} ' ' }') logger.info(msg) if write_block: block_coords.append( (0, 0, block_index )) # (Don't care about Z,Y indexes, just X-index) # Find *runs* of non-zero blocks block_coords = np.asarray(block_coords, dtype=np.int32) block_runs = runlength_encode_to_ranges( block_coords, True) # returns [[Z,Y,X1,X2], [Z,Y,X1,X2], ...] # Convert stop indexes from inclusive to exclusive block_runs[:, -1] += 1 # Discard Z,Y indexes and convert from indexes to pixels ranges = block_width * block_runs[:, 2:4] # iterate through contiguous blocks and write to DVID for (data_x_start, data_x_end) in ranges: datacrop = brick.volume[:, :, data_x_start:data_x_end].copy() data_offset_zyx = brick.physical_box[0] + (0, 0, data_x_start) with Timer() as _put_timer: try: output_service.write_subvolume(datacrop, data_offset_zyx, scale) except: logger.error( f"Error writing brick at {brick.physical_box.tolist()}, scale={scale}, offset={data_offset_zyx}" ) raise # Note: This timing data doesn't reflect ideal throughput, since throttle # and/or the resource manager muddy the numbers a bit... #megavoxels_per_second = datacrop.size / 1e6 / put_timer.seconds #logger.info(f"Put block {data_offset_zyx} in {put_timer.seconds:.3f} seconds ({megavoxels_per_second:.1f} Megavoxels/second)") brick.compress() msg = f"Slab {slab_index}: Scale {scale}: Writing bricks" if isinstance(output_service.base_service, DvidVolumeService): instance_name = output_service.base_service.instance_name msg += f" to {instance_name}" with Timer(msg, logger): brick_wall.bricks.map(write_brick).compute()
class SVDecimate(Workflow): """ Download pre-existing supervoxel meshes from a dvid tarsupervoxels instance and decimate them. This workflow can also be used to convert the mesh files from one format to another, or rescale the vertex coordinates. """ TarsupervoxelsInputSchema = \ { "description": "Parameters specify a DVID tarsupervoxels instance", "type": "object", "default": {}, "required": ["dvid"], "additionalProperties": False, "properties": { "dvid": { "default": {}, "type": "object", "required": ["server", "uuid", "tarsupervoxels-instance"], "additionalProperties": False, "properties": { "server": { "description": "location of DVID server to READ.", "type": "string", }, "uuid": { "description": "version node for READING segmentation", "type": "string" }, "tarsupervoxels-instance": { "description": "Name of a tarsupervoxels instance", "type": "string" } } } } } GenericDvidInstanceSchema = \ { "description": "Parameters to specify a generic dvid instance (server/uuid/instance).\n" "Omitted values will be copied from the input, or given default values.", "type": "object", "required": ["server", "uuid"], # Must not have default. (Appears below in a 'oneOf' context.) # "default": {}, "additionalProperties": False, "properties": { "server": { "description": "location of DVID server to READ.", "type": "string", "default": "" }, "uuid": { "description": "version node from dvid", "type": "string", "default": "" }, "instance": { "description": "Name of the instance to create", "type": "string" }, "sync-to": { "description": "When creating a tarsupervoxels instance, it should be sync'd to a labelmap instance.\n" "Give the instance name here.", "type": "string", "default": "" }, "create-if-necessary": { "description": "Whether or not to create the instance if it doesn't already exist.\n" "If you expect the instance to exist on the server already, leave this\n" "set to False to avoid confusion in the case of typos, UUID mismatches, etc.\n", "type": "boolean", "default": False }, } } TarsupervoxelsOutputSchema = \ { "additionalProperties": False, "properties": { "tarsupervoxels": GenericDvidInstanceSchema } } KeyvalueOutputSchema = \ { "additionalProperties": False, "properties": { "keyvalue": GenericDvidInstanceSchema } } DirectoryOutputSchema = \ { "additionalProperties": False, "properties": { "directory": { "description": "Directory to write supervoxel meshes into.", "type": "string", # Must not have default. (Appears below in a 'oneOf' context.) # "default": "" } } } SVDecimateOptionSchema = \ { "type": "object", "description": "Settings specific to the SVDecimate workflow", "default": {}, "additionalProperties": False, "properties": { "bodies": BodyListSchema, "decimation": { "description": "Mesh decimation aims to reduce the number of \n" "mesh vertices in the mesh to a fraction of the original mesh. \n" "To disable decimation, use 1.0.\n", "type": "number", "minimum": 0.0000001, "maximum": 1.0, # 1.0 == disable "default": 0.1 }, "decimation-library": { "type": "string", "enum": ["openmesh", "fq-in-memory", "fq-via-disk"], "default": "openmesh" }, "max-sv-vertices": { "description": "Ensure that meshes have no more vertices than specified by this setting.\n" "That is, decrease the decimation fraction if necessary bring the mesh vertex count below this number.\n", "type": "number", "default": 1e9 # very large }, "max-body-vertices": { "description": "If necessary, reduce the decimation fraction to ensure that the total vertex\n" "count across all supervoxels in each body does not exceed this number.\n", "type": "number", "default": 1e9, # effectively unlimited }, "rescale": { "description": "How to multiply the mesh vertex coordinates before saving the mesh.\n" "Typically very important when converting to ngmesh format from some other format.\n", "type": "array", "items": {"type": "number"}, "minItems": 3, "maxItems": 3, "default": flow_style([1.0, 1.0, 1.0]) }, "processes-per-body": { "description": "Parallelism to use when processing supervoxel meshes for each body.\n" "Bodies are processed in a single dask task, but further parallelism may be desirable within that task.\n", "type": "integer", "default": 1, "minValue": 1 }, "format": { "description": "Format in which to save the meshes", "type": "string", "enum": ["obj", # Wavefront OBJ (.obj) "drc", # Draco (compressed) (.drc) "ngmesh"], # "neuroglancer mesh" format -- a custom binary format. Note: Data is presumed to be 8nm resolution "default": "obj" }, } } Schema = copy.deepcopy(Workflow.schema()) Schema["properties"].update({ "input": TarsupervoxelsInputSchema, "output": { "oneOf": [ DirectoryOutputSchema, TarsupervoxelsOutputSchema, KeyvalueOutputSchema, ], "default": {"directory": "sv-meshes"} }, "svdecimate": SVDecimateOptionSchema }) @classmethod def schema(cls): return SVDecimate.Schema def execute(self): self._sanitize_config() self._prepare_output() input_config = self.config["input"]["dvid"] output_config = self.config["output"] options = self.config["svdecimate"] resource_config = self.config["resource-manager"] resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"]) server = input_config["server"] uuid = input_config["uuid"] tsv_instance = input_config["tarsupervoxels-instance"] bodies = load_body_list(options["bodies"], False) # Determine segmentation instance info = fetch_instance_info(server, uuid, tsv_instance) input_format = info["Extended"]["Extension"] output_format = options["format"] if np.array(options["rescale"] == 1.0).all() and output_format == "ngmesh" and input_format != "ngmesh": logger.warning("*** You are converting to ngmesh format, but you have not specified a rescale parameter! ***") decimation_lib = options["decimation-library"] max_sv_vertices = options["max-sv-vertices"] max_body_vertices = options["max-body-vertices"] num_procs = options["processes-per-body"] def process_body(body_id): with resource_mgr_client.access_context( input_config["server"], True, 1, 0 ): tar_bytes = fetch_tarfile(server, uuid, tsv_instance, body_id) sv_meshes = Mesh.from_tarfile(tar_bytes, concatenate=False) sv_meshes = {int(os.path.splitext(name)[0]): m for name, m in sv_meshes.items()} total_body_vertices = sum([len(m.vertices_zyx) for m in sv_meshes.values()]) decimation = min(1.0, max_body_vertices / total_body_vertices) try: _process_sv = partial(process_sv, decimation, decimation_lib, max_sv_vertices, output_format) if num_procs <= 1: output_table = [*starmap(_process_sv, sv_meshes.items())] else: output_table = compute_parallel(_process_sv, sv_meshes.items(), starmap=True, processes=num_procs, ordered=False, show_progress=False) cols = ['sv', 'orig_vertices', 'final_vertices', 'final_decimation', 'effective_decimation', 'mesh_bytes'] output_df = pd.DataFrame(output_table, columns=cols) output_df['body'] = body_id output_df['error'] = "" write_sv_meshes(output_df, output_config, output_format, resource_mgr_client) except Exception as ex: svs = [*sv_meshes.keys()] orig_vertices = [len(m.vertices_zyx) for m in sv_meshes.values()] output_df = pd.DataFrame({'sv': svs, 'orig_vertices': orig_vertices}) output_df['final_vertices'] = -1 output_df['final_decimation'] = -1 output_df['effective_decimation'] = -1 output_df['mesh_bytes'] = -1 output_df['body'] = body_id output_df['error'] = str(ex) return output_df.drop(columns=['mesh_bytes']) futures = self.client.map(process_body, bodies) # Support synchronous testing with a fake 'as_completed' object if hasattr(self.client, 'DEBUG'): ac = as_completed_synchronous(futures, with_results=True) else: ac = distributed.as_completed(futures, with_results=True) try: stats = [] for f, r in tqdm_proxy(ac, total=len(futures)): stats.append(r) if (r['error'] != "").any(): body = r['body'].iloc[0] logger.warning(f"Body {body} failed!") finally: stats_df = pd.concat(stats) stats_df.to_csv('mesh-stats.csv', index=False, header=True) with open('mesh-stats.pkl', 'wb') as f: pickle.dump(stats_df, f) def _sanitize_config(self): # Convert input/output CSV to absolute paths options = self.config["svdecimate"] assert options["bodies"], "No input body list provided" if isinstance(options["bodies"], str) and options["bodies"].endswith(".csv"): assert os.path.exists(options["bodies"]), \ f'Input file does not exist: {options["bodies"]}' is_distributed = self.config["cluster-type"] not in ("syncrhonous", "processes") needs_multiprocessing = (options["processes-per-body"] > 1) workers_are_daemon = dask.config.get('distributed.worker.daemon', True) if is_distributed and needs_multiprocessing and workers_are_daemon: msg = ("This workflow uses multiprocessing, so you must configure your dask workers NOT to be daemons.\n" "In your dask-config, set distributed.worker.daemon: false") raise RuntimeError(msg) def _prepare_output(self): """ If necessary, create the output directory or DVID instance so that meshes can be written to it. """ input_cfg = self.config["input"] output_cfg = self.config["output"] options = self.config["svdecimate"] ## directory output if 'directory' in output_cfg: # Convert to absolute so we can chdir with impunity later. output_cfg['directory'] = os.path.abspath(output_cfg['directory']) os.makedirs(output_cfg['directory'], exist_ok=True) return ## ## DVID output (either keyvalue or tarsupervoxels) ## (instance_type,) = output_cfg.keys() server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] instance = output_cfg[instance_type]['instance'] # If the output server or uuid is left blank, # we assume it should be auto-filled from the input settings. if server == "" or uuid == "": assert "dvid" in input_cfg if server == "": output_cfg[instance_type]['server'] = input_cfg["dvid"]["server"] if uuid == "": output_cfg[instance_type]['uuid'] = input_cfg["dvid"]["uuid"] # Resolve in case a branch was given instead of a specific uuid server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] uuid = resolve_ref(server, uuid) if is_locked(server, uuid): info = fetch_server_info(server) if "Mode" in info and info["Mode"] == "allow writes on committed nodes": logger.warn(f"Output is a locked node ({uuid}), but server is in full-write mode. Proceeding.") elif os.environ.get("DVID_ADMIN_TOKEN", ""): logger.warn(f"Output is a locked node ({uuid}), but you defined DVID_ADMIN_TOKEN. Proceeding.") else: raise RuntimeError(f"Can't write to node {uuid} because it is locked.") if instance_type == 'tarsupervoxels' and not self.input_is_labelmap_supervoxels(): msg = ("You shouldn't write to a tarsupervoxels instance unless " "you're reading supervoxels from a labelmap input.\n" "Use a labelmap input source, and set supervoxels: true") raise RuntimeError(msg) existing_instances = fetch_repo_instances(server, uuid) if instance in existing_instances: # Instance exists -- nothing to do. return if not output_cfg[instance_type]['create-if-necessary']: msg = (f"Output instance '{instance}' does not exist, " "and your config did not specify create-if-necessary") raise RuntimeError(msg) assert instance_type in ('tarsupervoxels', 'keyvalue') ## keyvalue output if instance_type == "keyvalue": create_instance(server, uuid, instance, "keyvalue", tags=["type=meshes"]) return ## tarsupervoxels output sync_instance = output_cfg["tarsupervoxels"]["sync-to"] if not sync_instance: # Auto-fill a default 'sync-to' instance using the input segmentation, if possible. info = fetch_instance_info(*[input_cfg["dvid"][k] for k in ("server", "uuid", "tarsupervoxels-instance")]) syncs = info['Base']['Syncs'] if syncs: sync_instance = syncs[0] if not sync_instance: msg = ("Can't create a tarsupervoxels instance unless " "you specify a 'sync-to' labelmap instance name.") raise RuntimeError(msg) if sync_instance not in existing_instances: msg = ("Can't sync to labelmap instance '{sync_instance}': " "it doesn't exist on the output server.") raise RuntimeError(msg) create_tarsupervoxel_instance(server, uuid, instance, sync_instance, options["format"])
ZarrCreationSettingsSchema = \ { "description": "Settings to use when creating an Zarr volume.\n", "type": "object", "default": {}, "additionalProperties": False, "properties": { "shape": { "description": "The shape of the volume.\n" "If not provided, it is automatically set from the bounding-box upper coordinate and global-offset (if any).\n", "type": "array", "items": { "type": "integer" }, "minItems": 3, "maxItems": 3, "default": flow_style([-1,-1,-1]) }, "dtype": { "description": "Datatype of the volume. Must be specified when creating a new volume.", "type": "string", "enum": ["auto", "uint8", "uint16", "uint32", "uint64", "int8", "int16", "int32", "int64", "float32", "float64"], "default": "auto" }, "chunk-shape": { "desription": "The shape of the chunks on disk.", "type": "array", "items": { "type": "integer" }, "minItems": 3, "maxItems": 3, "default": flow_style([128,128,128]) },
class DecimateMeshes(Workflow): """ Download pre-existing meshes from a dvid tarsupervoxels instance, and decimate them. Basically a clusterized wrapper around neuclease.bin.decimate_existing_mesh TODO: Save mesh stats to a csv file. """ DvidTarsupervoxelsInstanceSchema = \ { "description": "Parameters specify a DVID instance", "type": "object", "default": {}, "required": ["dvid"], "additionalProperties": False, "properties": { "dvid": { "default": {}, "type": "object", "required": ["server", "uuid", "tarsupervoxels-instance"], "additionalProperties": False, "properties": { "server": { "description": "location of DVID server to READ.", "type": "string", }, "uuid": { "description": "version node for READING segmentation", "type": "string" }, "tarsupervoxels-instance": { "description": "Name of a tarsupervoxels instance", "type": "string" } } } } } DecimateMeshesOptionsSchema = \ { "type": "object", "description": "Settings specific to the DecimateMeshes workflow", "default": {}, "additionalProperties": False, "properties": { "bodies": BodyListSchema, "skip-existing": { "description": "If true, skip any meshes that are already present in the output directory.", "type": "boolean", "default": False, }, "format": { "description": "Format to save the meshes in", "type": "string", "enum": ["obj", # Wavefront OBJ (.obj) "drc", # Draco (compressed) (.drc) "ngmesh"], # "neuroglancer mesh" format -- a custom binary format. Note: Data is presumed to be 8nm resolution "default": "obj" }, "decimation": { "description": "Mesh decimation aims to reduce the number of \n" "mesh vertices in the mesh to a fraction of the original mesh. \n" "To disable decimation, use 1.0.\n", "type": "number", "minimum": 0.0000001, "maximum": 1.0, # 1.0 == disable "default": 0.1 }, "max-vertices": { "description": "Ensure that meshes have no more vertices than specified by this setting.\n" "That is, decrease the decimation fraction if necessary bring the mesh vertex count below this number.\n", "type": "number", "default": 1e9 # very large }, "rescale": { "description": "How to multiply the mesh vertex coordinates before saving the mesh.\n" "Typically very important when converting to ngmesh format from some other format.\n", "type": "array", "items": {"type": "number"}, "minItems": 3, "maxItems": 3, "default": flow_style([1.0, 1.0, 1.0]) }, "output-directory": { "description": "Location to write decimated meshes to", "type": "string", "default": "meshes" } } } Schema = copy.deepcopy(Workflow.schema()) Schema["properties"].update({ "input": DvidTarsupervoxelsInstanceSchema, "decimatemeshes": DecimateMeshesOptionsSchema }) @classmethod def schema(cls): return DecimateMeshes.Schema def _sanitize_config(self): """ - Normalize/overwrite certain config values - Check for config mistakes - Simple sanity checks """ # Resolve uuid if necessary (e.g. 'master' -> abc123) dvid_cfg = self.config["input"]["dvid"] dvid_cfg["uuid"] = resolve_ref(dvid_cfg["server"], dvid_cfg["uuid"]) # Convert input/output CSV to absolute paths options = self.config["decimatemeshes"] assert options["bodies"], \ "No input body list provided" if isinstance(options["bodies"], str) and options["bodies"].endswith(".csv"): assert os.path.exists(options["bodies"]), \ f'Input file does not exist: {options["bodies"]}' def execute(self): self._sanitize_config() input_config = self.config["input"]["dvid"] options = self.config["decimatemeshes"] resource_config = self.config["resource-manager"] skip_existing = options['skip-existing'] output_dir = options["output-directory"] os.makedirs(output_dir, exist_ok=True) resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"]) server = input_config["server"] uuid = input_config["uuid"] tsv_instance = input_config["tarsupervoxels-instance"] # Determine segmentation instance info = fetch_instance_info(server, uuid, tsv_instance) seg_instance = info["Base"]["Syncs"][0] input_format = info["Extended"]["Extension"] if np.array(options["rescale"] == 1.0).all( ) and options["format"] == "ngmesh" and input_format != "ngmesh": logger.warning( "*** You are converting to ngmesh format, but you have not specified a rescale parameter! ***" ) def process_body(body_id): output_path = f'{output_dir}/{body_id}.{options["format"]}' if skip_existing and os.path.exists(output_path): return (body_id, 0, 0.0, 0, 'skipped', 0) with resource_mgr_client.access_context(input_config["server"], True, 1, 0): try: mutid = fetch_mutation_id(server, uuid, seg_instance, body_id) except HTTPError: # FIXME: Better to log the exception strings to a file return (body_id, 0, 0.0, 0, 'error-mutid', 0) try: tar_bytes = fetch_tarfile(server, uuid, tsv_instance, body_id) except HTTPError: # FIXME: Better to log the exception strings to a file return (body_id, 0, 0.0, 0, 'error-fetch', mutid) try: vertex_count, fraction, orig_vertices = \ decimate_existing_mesh( server, uuid, tsv_instance, body_id, options["decimation"], options["max-vertices"], options["rescale"], options["format"], output_path, tar_bytes=tar_bytes ) except: return (body_id, 0, 0.0, 0, 'error-generate', mutid) return (body_id, vertex_count, fraction, orig_vertices, 'success', mutid) bodies = self._load_body_list(options["bodies"], server, uuid, seg_instance) # Choose more partitions than cores, so that early finishers have the opportunity to steal work. bodies_bag = dask.bag.from_sequence(bodies, npartitions=self.total_cores() * 10) with Timer(f"Decimating {len(bodies)} meshes", logger): stats = bodies_bag.map(process_body).compute() stats_df = pd.DataFrame(stats, columns=[ 'body', 'vertices', 'decimation', 'orig_vertices', 'result', 'mutid' ]) stats_df['uuid'] = uuid stats_df.to_csv('mesh-stats.csv', index=False, header=True) np.save('mesh-stats.npy', stats_df.to_records(index=False)) failed_df = stats_df.query('result != "success"') if len(failed_df) > 0: logger.warning( f"{len(failed_df)} meshes could not be generated. See mesh-stats.csv" ) logger.warning(f"Results:\n{stats_df['result'].value_counts()}") def _load_body_list(self, cfg_bodies, server, uuid, seg_instance): options = self.config["decimatemeshes"] if isinstance(cfg_bodies, str) and not cfg_bodies.endswith('.csv'): kafka_timestamp_string = cfg_bodies return self._determine_changed_labelmap_bodies( kafka_timestamp_string, server, uuid, seg_instance) else: return load_body_list(cfg_bodies, False) def _determine_changed_labelmap_bodies(self, kafka_timestamp_string, server, uuid, seg_instance): """ Read the entire labelmap kafka log, and determine which bodies have changed since the given timestamp (a string). Example timestamps: - "2018-11-22" - "2018-11-22 17:34:00" Returns: list of body IDs """ svc_cfg = { "dvid": { "server": server, "uuid": uuid, "segmentation-name": seg_instance } } svc = DvidVolumeService(svc_cfg) subset_bodies = svc.determine_changed_labelmap_bodies( kafka_timestamp_string) if not subset_bodies: raise RuntimeError( "Based on your current settings, no meshes will be generated at all.\n" f"No bodies have changed since your specified timestamp {kafka_timestamp_string}" ) return subset_bodies
import numpy as np from .. import VolumeServiceReader from confiddler import flow_style logger = logging.getLogger(__name__) NewAxisOrderSchema = \ { "description": "How to present the volume, in terms of the source volume axes.", "type": "array", "minItems": 3, "maxItems": 3, "items": { "type": "string", "enum": ["x", "y", "z", "1-x", "1-y", "1-z"] }, "default": flow_style(["x", "y", "z"]) # no transpose } class TransposedVolumeService(VolumeServiceReader): """ Wraps an existing VolumeServiceReader and presents a transposed or rotated view of it. (Technically, this is an example of the so-called "decorator" GoF pattern.) """ ## These constants are expressed using X,Y,Z conventions! # Rotations in the XY-plane, about the Z axis
from confiddler import flow_style BoundingBoxSchema = \ { "description": "The bounding box [[x0,y0,z0],[x1,y1,z1]], \n" "where [x1,y1,z1] == maxcoord+1 (i.e. Python conventions)", "type": "array", "minItems": 2, "maxItems": 2, "items": { "type": "array", "items": { "type": "integer" }, "minItems": 3, "maxItems": 3 }, "default": flow_style([[-1,-1,-1], [-1,-1,-1]]) } GeometrySchema = \ { "description": "Describes a volume's geometric and access pattern properties.\n", "type": "object", "default": {}, "properties": { "bounding-box": BoundingBoxSchema, "message-block-shape": { "description": "The preferred access pattern block shape.\n" "A value of -1 for any axis means 'auto'", "type": "array", "items": { "type": "integer" },