def _load_and_overwrite_dask_config(execution_dir, cluster_type): # Load dask config, inject defaults for (selected) missing entries, and overwrite in-place. dask_config_path = os.path.abspath(f'{execution_dir}/dask-config.yaml') if os.path.exists(dask_config_path): # Check for completely empty dask config file from ruamel.yaml import YAML yaml = YAML() config = yaml.load(open(dask_config_path, 'r')) if not config: dask_config = {} validate(dask_config, DaskConfigSchema, inject_defaults=True) else: dask_config = load_config(dask_config_path, DaskConfigSchema) else: dask_config = {} validate(dask_config, DaskConfigSchema, inject_defaults=True) # Don't pollute the config file with extra jobqueue parameters we aren't using if "jobqueue" in dask_config: for key in list(dask_config["jobqueue"].keys()): if key != cluster_type: del dask_config["jobqueue"][key] if len(dask_config["jobqueue"]) == 0: del dask_config["jobqueue"] dump_config(dask_config, dask_config_path) # This environment variable is recognized by dask itself os.environ["DASK_CONFIG"] = dask_config_path dask.config.paths.append(dask_config_path) dask.config.refresh()
def test_emit_defaults_with_comments(): schema = { 'type': 'object', 'properties': { 'mystring': { 'description': 'MYSTRING_DESCRIPTION_TEXT', 'type': 'string', 'default': 'DEFAULT' }, 'mynumber': { 'description': 'MYNUMBER_DESCRIPTION_TEXT', 'type': 'number', 'default': 42 } }, 'default': {} } defaults = emit_defaults(schema, include_yaml_comments=True) assert defaults == {'mystring': 'DEFAULT', 'mynumber': 42} validate(defaults, schema) f = StringIO() yaml.dump(defaults, f) assert 'MYSTRING_DESCRIPTION_TEXT' in f.getvalue() assert 'MYNUMBER_DESCRIPTION_TEXT' in f.getvalue()
def __init__(self, resource_manager_config): validate(resource_manager_config, ResourceManagerSchema, inject_defaults=True) self.resource_manager_config = resource_manager_config self.resource_server_process = None cfg = self.resource_manager_config server = cfg["server"] port = cfg["port"] self.launch_on_driver = (server == "driver") if self.launch_on_driver: # Overwrite workflow config data so workers see our IP address. cfg["server"] = driver_ip_addr else: # confiddler adds an attribute 'from_default' to indicate that the section # was missing from the user's config and provided from default values. if not hasattr(cfg["config"], 'from_default'): msg = ( "The resource manager config should only be specified when resource manager 'server' is set to 'driver'.\n" "Remove the resource-manager 'config' section from your config file.\n" "(If the resource manager server is already running on a different machine, configure it there.)\n" ) raise RuntimeError(msg) if server and port == 0: msg = f"You specified a resource server ({server}), but no port" raise RuntimeError(msg)
def __init__(self, original_volume_service, labelmap_config): self.original_volume_service = original_volume_service # See VolumeService.service_chain validate(labelmap_config, LabelMapSchema, inject_defaults=True) # Convert relative path to absolute if not labelmap_config["file"].startswith( 'gs://') and not labelmap_config["file"].startswith("/"): abspath = os.path.abspath(labelmap_config["file"]) labelmap_config["file"] = abspath self.labelmap_config = labelmap_config # These are computed on-demand and memoized for the sake of pickling support. # See __getstate__() self._mapper = None self._mapping_pairs = None self._compressed_mapping_pairs = None assert np.issubdtype(self.dtype, np.integer) self.apply_when_reading = labelmap_config["apply-when"] in ( "reading", "reading-and-writing") self.apply_when_writing = labelmap_config["apply-when"] in ( "writing", "reading-and-writing") self.missing_value_mode = labelmap_config["missing-value-mode"]
def test_no_adapter(setup_hdf5_service): _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) assert volume_config["adapters"]["rescale-level"] is None reader = VolumeService.create_from_config(volume_config) assert isinstance(reader, Hdf5VolumeService), \ "Should not create a ScaledVolumeService adapter at all if rescale-level is null"
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, BrainMapsVolumeSchema, inject_defaults=True) if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) self._brainmaps_client = BrainMapsVolume( str(volume_config["brainmaps"]["project"]), volume_config["brainmaps"]["dataset"], volume_config["brainmaps"]["volume-id"], volume_config["brainmaps"]["change-stack-id"], dtype=None, use_gzip=volume_config["brainmaps"]["use-gzip"]) # Force client to fetch dtype now, so it isn't fetched after pickling. self._brainmaps_client.dtype block_width = volume_config["geometry"]["block-width"] if block_width == -1: # FIXME: I don't actually know what BrainMap's internal block size is... block_width = 64 preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [64, 64, 6400]) bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] replace_default_entries(bounding_box_zyx, self._brainmaps_client.bounding_box) assert (bounding_box_zyx[0] >= self._brainmaps_client.bounding_box[0]).all() \ and (bounding_box_zyx[1] <= self._brainmaps_client.bounding_box[1]).all(), \ f"Specified bounding box ({bounding_box_zyx.tolist()}) extends outside the "\ f"BrainMaps volume geometry ({self._brainmaps_client.bounding_box.tolist()})" available_scales = list(volume_config["geometry"]["available-scales"]) fetch_blockwise = volume_config["brainmaps"]["fetch-blockwise"] # Store members self._bounding_box_zyx = bounding_box_zyx self._resource_manager_client = resource_manager_client self._preferred_message_shape_zyx = preferred_message_shape_zyx self._block_width = block_width self._available_scales = available_scales self._fetch_blockwise = fetch_blockwise # Overwrite config entries that we might have modified volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( )
def test_available_Scales(setup_hdf5_service): _raw_volume, volume_config, _full_from_h5, _h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) volume_config["adapters"]["rescale-level"] = { "level": 1, "available-scales": [0, 1, 2] } reader = VolumeService.create_from_config(volume_config) assert reader.available_scales == [0, 1, 2]
def test_emit_defaults_with_flow_style(): schema = copy.deepcopy(TEST_SCHEMA) d = schema['properties']['myobject']['default'] schema['properties']['myobject']['default'] = flow_style(d) defaults = emit_defaults(schema) assert defaults['myobject'].fa.flow_style() # Make sure defaults still validate # (despite being yaml CommentedMap or whatever) validate(defaults, schema)
def test_inject_default(): schema = copy.deepcopy(TEST_SCHEMA) data = {'mynumber': 10} f = StringIO() yaml.dump(data, f) f.seek(0) cfg = load_config(f, schema) assert cfg['mystring'] == 'DEFAULT' assert cfg['myobject']['inner-string'] == 'INNER_DEFAULT' assert cfg['myobject'].from_default == True validate(cfg, schema)
def test_sample_labels(setup_hdf5_service): _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) downsampled = downsample(full_from_h5, 2, 'block-mean') points = [np.random.randint(d, size=(10, )) for d in downsampled.shape] points = np.transpose(points) # Scale 1 volume_config["adapters"]["rescale-level"] = 1 scaled_reader = VolumeService.create_from_config(volume_config) labels = scaled_reader.sample_labels(points) assert (labels == downsampled[(*points.transpose(), )]).all()
def test_emit_defaults(): schema = copy.deepcopy(TEST_SCHEMA) defaults = emit_defaults(schema) assert defaults == { 'mystring': 'DEFAULT', 'mynumber': 42, 'myobject': { 'inner-string': 'INNER_DEFAULT' } } # Make sure defaults still validate # (despite being yaml CommentedMap or whatever) validate(defaults, schema)
def load_and_overwrite_dask_config(cluster_type, dask_config_path=None, overwrite=False): """ Load dask config, inject defaults for (selected) missing entries, and optionally overwrite in-place. Note: Also re-initializes the distributed logging configuration. """ if dask_config_path is None and 'DASK_CONFIG' in os.environ: dask_config_path = os.environ["DASK_CONFIG"] dask_config_path = dask_config_path or 'dask-config.yaml' dask_config_path = os.path.abspath(dask_config_path) if os.path.exists(dask_config_path): # Check for completely empty dask config file from ruamel.yaml import YAML yaml = YAML() config = yaml.load(open(dask_config_path, 'r')) if not config: dask_config = {} validate(dask_config, DaskConfigSchema, inject_defaults=True) else: dask_config = load_config(dask_config_path, DaskConfigSchema) else: dask_config = {} validate(dask_config, DaskConfigSchema, inject_defaults=True) # Don't pollute the config file with extra jobqueue parameters we aren't using if "jobqueue" in dask_config: for key in list(dask_config["jobqueue"].keys()): if key != cluster_type: del dask_config["jobqueue"][key] if len(dask_config["jobqueue"]) == 0: del dask_config["jobqueue"] if overwrite: dump_config(dask_config, dask_config_path) # This environment variable is recognized by dask itself os.environ["DASK_CONFIG"] = dask_config_path dask.config.paths.append(dask_config_path) dask.config.refresh() # Must be imported this way due to aliased name 'config' in distributed.__init__ from distributed.config import initialize_logging initialize_logging(dask.config.config)
def test_full_volume_downsample_1(setup_hdf5_service): _raw_volume, volume_config, full_from_h5, h5_reader = setup_hdf5_service validate(volume_config, GrayscaleVolumeSchema, inject_defaults=True) # Scale 1 volume_config["adapters"]["rescale-level"] = 1 scaled_reader = VolumeService.create_from_config(volume_config) assert (scaled_reader.bounding_box_zyx == h5_reader.bounding_box_zyx // 2).all() assert (scaled_reader.preferred_message_shape == h5_reader.preferred_message_shape // 2).all() assert scaled_reader.block_width == h5_reader.block_width // 2 assert scaled_reader.dtype == h5_reader.dtype full_scaled = scaled_reader.get_subvolume(scaled_reader.bounding_box_zyx) assert (full_scaled == downsample(full_from_h5, 2, 'block-mean')).all() assert full_scaled.flags.c_contiguous
def test_emit_incomplete_defaults(): schema = copy.deepcopy(TEST_SCHEMA) # Delete the default for 'mynumber' del schema['properties']['mynumber']['default'] defaults = emit_defaults(schema) assert defaults == { 'mystring': 'DEFAULT', 'mynumber': '{{NO_DEFAULT}}', 'myobject': { 'inner-string': 'INNER_DEFAULT' } } # The '{{NO_DEFAULT}}' setting doesn't validate. # That's okay. with pytest.raises(ValidationError): validate(defaults, schema)
def __init__(self, volume_config): validate(volume_config, SliceFilesVolumeSchema, inject_defaults=True) # Convert path to absolute if necessary (and write back to the config) slice_fmt = volume_config["slice-files"]["slice-path-format"] assert not slice_fmt.startswith('gs://'), "FIXME: Support gbuckets" slice_fmt = os.path.abspath(slice_fmt) self.slice_corner_yx = volume_config["slice-files"][ "slice-xy-offset"][::-1] dtype = None bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] # Determine complete bounding box if -1 in bounding_box_zyx.flat: default_bounding_box_zyx, dtype = determine_stack_attributes( slice_fmt) default_bounding_box_zyx[:, 1:] += self.slice_corner_yx replace_default_entries(bounding_box_zyx, default_bounding_box_zyx) if (bounding_box_zyx[0] < default_bounding_box_zyx[0]).any() or ( bounding_box_zyx[1] > default_bounding_box_zyx[1]).any(): msg = ( f"The given bounding-box ({bounding_box_zyx[:,::-1].tolist()}) exceeds " f"the maximum possible bounding box for this image stack ({default_bounding_box_zyx[:,::-1].tolist()})." ) raise RuntimeError(msg) assert (bounding_box_zyx[0,1:] >= self.slice_corner_yx).all(), \ "The bounding-box can't start below the slice-xy-offset" # Determine complete preferred "message shape" - one full output slice. output_slice_shape = bounding_box_zyx[1] - bounding_box_zyx[0] output_slice_shape[0] = 1 preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, output_slice_shape) assert (preferred_message_shape_zyx == output_slice_shape).all(), \ f"Preferred message shape for slice files must be a single Z-slice, and a complete XY output plane ({output_slice_shape}), "\ f"not {preferred_message_shape_zyx}" available_scales = volume_config["geometry"]["available-scales"] assert available_scales == [0], \ "Bad config: slice-files reader supports only scale zero." # Store members self._slice_fmt = slice_fmt self._dtype = dtype self._dtype_nbytes = np.dtype(dtype).type().nbytes self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._available_scales = available_scales # Overwrite config entries that we might have modified volume_config["slice-files"]["slice-path-format"] = slice_fmt volume_config["geometry"][ "bounding-box"] = bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = preferred_message_shape_zyx[::-1].tolist( ) # Forbid unsupported config entries assert volume_config["geometry"]["block-width"] == -1, \ "Slice files have no concept of a native block width. Please leave it set to the default (-1)"
def __init__(self, volume_config): validate(volume_config, N5VolumeSchema, inject_defaults=True) # Convert path to absolute if necessary (and write back to the config) path = os.path.abspath(volume_config["n5"]["path"]) self._path = path volume_config["n5"]["path"] = self._path dataset_name = volume_config["n5"]["dataset"] self._dataset_name = dataset_name if self._dataset_name.startswith('/'): self._dataset_name = self._dataset_name[1:] volume_config["n5"]["dataset"] = self._dataset_name self._n5_file = None self._n5_datasets = {} self._ensure_datasets_exist(volume_config) if isinstance(self.n5_dataset(0), z5py.group.Group): raise RuntimeError("The N5 dataset you specified appears to be a 'group', not a volume.\n" "Please pass the complete dataset name. If your dataset is multi-scale,\n" "pass the name of scale 0 as the dataset name (e.g. 's0').\n") chunk_shape = np.array(self.n5_dataset(0).chunks) assert len(chunk_shape) == 3 preferred_message_shape_zyx = np.array(volume_config["geometry"]["message-block-shape"])[::-1] # Replace -1's in the message-block-shape with the corresponding chunk_shape dimensions. replace_default_entries(preferred_message_shape_zyx, chunk_shape) missing_shape_dims = (preferred_message_shape_zyx == -1) preferred_message_shape_zyx[missing_shape_dims] = chunk_shape[missing_shape_dims] assert not (preferred_message_shape_zyx % chunk_shape).any(), \ f"Expected message-block-shape ({preferred_message_shape_zyx}) "\ f"to be a multiple of the chunk shape ({chunk_shape})" if chunk_shape[0] == chunk_shape[1] == chunk_shape[2]: block_width = int(chunk_shape[0]) else: # The notion of 'block-width' doesn't really make sense if the chunks aren't cubes. block_width = -1 auto_bb = np.array([(0,0,0), self.n5_dataset(0).shape]) bounding_box_zyx = np.array(volume_config["geometry"]["bounding-box"])[:,::-1] assert (auto_bb[1] >= bounding_box_zyx[1]).all(), \ f"Volume config bounding box ({bounding_box_zyx}) exceeds the bounding box of the data ({auto_bb})." # Replace -1 bounds with auto missing_bounds = (bounding_box_zyx == -1) bounding_box_zyx[missing_bounds] = auto_bb[missing_bounds] # Store members self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._block_width = block_width self._available_scales = volume_config["geometry"]["available-scales"] # Overwrite config entries that we might have modified volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"]["bounding-box"] = self._bounding_box_zyx[:,::-1].tolist() volume_config["geometry"]["message-block-shape"] = self._preferred_message_shape_zyx[::-1].tolist()
def __init__(self, volume_config): validate(volume_config, Hdf5VolumeSchema, inject_defaults=True) # HDF5 settings path = volume_config["hdf5"]["path"] dataset_name = volume_config["hdf5"]["dataset"] dtype = volume_config["hdf5"]["dtype"] writable = volume_config["hdf5"]["writable"] # Geometry bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) block_width = volume_config["geometry"]["block-width"] assert list(volume_config["geometry"]["available-scales"]) == [0], \ "Hdf5VolumeService supports only scale 0" if not writable and not os.path.exists(path): raise RuntimeError( f"File does not exist: {path}\n" "You did not specify 'writable' in the config, so I won't create it.:\n" ) mode = 'r' if writable: mode = 'a' self._h5_file = h5py.File(path, mode) if dataset_name in self._h5_file: self._dataset = self._h5_file[dataset_name] else: if not writable: raise RuntimeError( f"Dataset '{dataset_name}' not found in file: {path}\n" "You did not specify 'writable' in the config, so I won't create it.\n" ) if dtype == "auto": raise RuntimeError( f"Can't create dataset '{dataset_name}': No dtype specified in the config." ) if -1 in bounding_box_zyx.flat: raise RuntimeError( f"Can't create dataset '{dataset_name}': Bounding box is not completely specified in the config." ) if block_width == -1: chunks = np.minimum(3 * (DEFAULT_CHUNK_WIDTH, ), bounding_box_zyx[1]) replace_default_entries(chunks, 3 * (DEFAULT_CHUNK_WIDTH, )) else: chunks = 3 * (block_width, ) self._dataset = self._h5_file.create_dataset( dataset_name, shape=bounding_box_zyx[1], dtype=np.dtype(dtype), chunks=tuple(chunks)) ### ### bounding_box_zyx ### replace_default_entries(bounding_box_zyx, [(0, 0, 0), self._dataset.shape]) assert (bounding_box_zyx[0] >= 0).all() assert (bounding_box_zyx[1] <= self._dataset.shape).all(), \ f"bounding box ({bounding_box_zyx.tolist()}) exceeds the stored hdf5 volume shape ({self._dataset.shape})" ### ### dtype ### dtype = self._dataset.dtype ### ### preferred_message_shape_zyx ### chunk_shape = self._dataset.chunks or self._dataset.shape assert len( self._dataset.shape) == 3, f"Dataset '{dataset_name} isn't 3D" if -1 in preferred_message_shape_zyx: assert (preferred_message_shape_zyx == -1).all(), \ "Please specify the entire message shape in your config (or omit it entirely)" # Aim for bricks of ~256 MB MB = 2**20 chunk_bytes = np.prod(chunk_shape) * dtype.itemsize chunks_per_brick = max(1, 256 * MB // chunk_bytes) preferred_message_shape_zyx = np.array( (*chunk_shape[:2], chunk_shape[2] * chunks_per_brick)) if block_width == -1: block_width = chunk_shape[0] else: assert block_width == chunk_shape[0], \ "block-width does not match file chunk shape" ## ## Store members ## self._mode = mode self._path = path self._dataset_name = dataset_name self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._dtype = self._dataset.dtype ## ## Overwrite config entries that we might have modified ## volume_config["hdf5"]["dtype"] = self._dtype.name volume_config["geometry"]["block-width"] = chunk_shape[0] volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( )
def __init__(self, volume_config): validate(volume_config, ZarrVolumeSchema, inject_defaults=True) # Convert path to absolute if necessary (and write back to the config) path = os.path.abspath(volume_config["zarr"]["path"]) self._path = path volume_config["zarr"]["path"] = self._path dataset_name = volume_config["zarr"]["dataset"] self._dataset_name = dataset_name if self._dataset_name.startswith('/'): self._dataset_name = self._dataset_name[1:] volume_config["zarr"]["dataset"] = self._dataset_name self._store_cls = getattr(zarr, volume_config["zarr"]["store-type"]) self._zarr_file = None self._zarr_datasets = {} self._ensure_datasets_exist(volume_config) if isinstance(self.zarr_dataset(0), zarr.hierarchy.Group): raise RuntimeError("The Zarr dataset you specified appears to be a 'group', not a volume.\n" "Please pass the complete dataset name. If your dataset is multi-scale,\n" "pass the name of scale 0 as the dataset name (e.g. 's0').\n") chunk_shape = np.array(self.zarr_dataset(0).chunks) assert len(chunk_shape) == 3 preferred_message_shape_zyx = np.array(volume_config["geometry"]["message-block-shape"])[::-1] # Replace -1's in the message-block-shape with the corresponding chunk_shape dimensions. replace_default_entries(preferred_message_shape_zyx, chunk_shape) missing_shape_dims = (preferred_message_shape_zyx == -1) preferred_message_shape_zyx[missing_shape_dims] = chunk_shape[missing_shape_dims] if (preferred_message_shape_zyx % chunk_shape).any(): msg = (f"zarr volume: Expected message-block-shape ({preferred_message_shape_zyx[::-1]}) " f"to be a multiple of the chunk shape ({chunk_shape[::-1]})") logger.warning(msg) # The notion of 'block-width' doesn't really make sense if the chunks aren't cubes, # but we'll assume the user has chosen something reasonable and just use the minimum chunk dimension. block_width = min(chunk_shape) global_offset = np.array(volume_config["zarr"]["global-offset"][::-1]) auto_bb = np.array([(0,0,0), self.zarr_dataset(0).shape]) auto_bb += global_offset bounding_box_zyx = np.array(volume_config["geometry"]["bounding-box"])[:,::-1] assert (auto_bb[1] >= bounding_box_zyx[1]).all() or volume_config["zarr"]["out-of-bounds-access"] != "forbid", \ f"Volume config bounding box ({bounding_box_zyx}) exceeds the bounding box of the data ({auto_bb}).\n"\ f"If you want to enable reading out-of-bounds regions (as empty), add out-of-bounds-access: 'permit-empty' to your config." # Replace -1 bounds with auto missing_bounds = (bounding_box_zyx == -1) bounding_box_zyx[missing_bounds] = auto_bb[missing_bounds] # Store members self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._block_width = block_width self._available_scales = volume_config["geometry"]["available-scales"] self._global_offset = global_offset self._out_of_bounds_access = volume_config["zarr"]["out-of-bounds-access"] # Overwrite config entries that we might have modified volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"]["bounding-box"] = self._bounding_box_zyx[:,::-1].tolist() volume_config["geometry"]["message-block-shape"] = self._preferred_message_shape_zyx[::-1].tolist()
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, DvidGenericVolumeSchema, inject_defaults=True) assert 'apply-labelmap' not in volume_config["dvid"].keys(), \ ("The apply-labelmap section should be in the 'adapters' section, (parallel to 'dvid' and 'geometry'), " "not nested within the 'dvid' section!") ## ## server, uuid ## ## Note: ## self.uuid will be resolved, but volume_config["dvid"]["uuid"] ## will not be overwritten. It will remain unresolved. ## self._server = volume_config["dvid"]["server"] self._uuid = resolve_ref(volume_config["dvid"]["server"], volume_config["dvid"]["uuid"]) self._throttle = volume_config["dvid"]["accept-throttling"] ## ## instance, dtype, etc. ## config_block_width = volume_config["geometry"]["block-width"] assert ('segmentation-name' in volume_config["dvid"]) ^ ('grayscale-name' in volume_config["dvid"]), \ "Config error: Specify either segmentation-name or grayscale-name (not both)" if "segmentation-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["segmentation-name"] self._dtype = np.uint64 elif "grayscale-name" in volume_config["dvid"]: self._instance_name = volume_config["dvid"]["grayscale-name"] self._dtype = np.uint8 self._dtype_nbytes = np.dtype(self._dtype).type().nbytes try: instance_info = fetch_instance_info(self._server, self._uuid, self._instance_name) except HTTPError as ex: if ex.response.status_code != 400: raise if not volume_config["dvid"]["create-if-necessary"]: existing_instances = fetch_repo_instances( self._server, self._uuid) if self._instance_name not in existing_instances: raise RuntimeError( f"Instance '{self._instance_name}' does not exist in {self._server} / {self._uuid}." "Add 'create-if-necessary: true' to your config if you want it to be created.'" ) raise # Instance doesn't exist yet -- we are going to create it. if "segmentation-name" in volume_config["dvid"]: self._instance_type = 'labelmap' # get_voxels doesn't really care if it's labelarray or labelmap... self._is_labels = True else: self._instance_type = 'uint8blk' self._is_labels = False block_width = config_block_width else: self._instance_type = instance_info["Base"]["TypeName"] self._is_labels = self._instance_type in ('labelblk', 'labelarray', 'labelmap') if self._instance_type == "googlevoxels" and instance_info[ "Extended"]["Scales"][0]["channelType"] == "UINT64": self._is_labels = True bs_x, bs_y, bs_z = instance_info["Extended"]["BlockSize"] assert (bs_x == bs_y == bs_z), "Expected blocks to be cubes." block_width = bs_x if "disable-indexing" in volume_config["dvid"]: self.disable_indexing = volume_config["dvid"]["disable-indexing"] else: self.disable_indexing = DvidSegmentationServiceSchema[ "properties"]["disable-indexing"]["default"] if "enable-downres" in volume_config["dvid"]: self.enable_downres = volume_config["dvid"]["enable-downres"] else: self.enable_downres = DvidSegmentationServiceSchema["properties"][ "enable-downres"]["default"] if "gzip-level" in volume_config["dvid"]: self.gzip_level = volume_config["dvid"]["gzip-level"] else: self.gzip_level = DvidSegmentationServiceSchema["properties"][ "gzip-level"]["default"] # Whether or not to read the supervoxels from the labelmap instance instead of agglomerated labels. self.supervoxels = ("supervoxels" in volume_config["dvid"]) and ( volume_config["dvid"]["supervoxels"]) ## ## default block width ## assert config_block_width in (-1, block_width), \ f"DVID volume block-width ({config_block_width}) from config does not match server metadata ({block_width})" if block_width == -1: # No block-width specified; choose default block_width = 64 ## ## bounding-box ## bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] try: stored_extents = fetch_volume_box(self._server, self.uuid, self._instance_name) except HTTPError: assert -1 not in bounding_box_zyx.flat[:], \ f"Instance '{self._instance_name}' does not yet exist on the server, "\ "so your volume_config must specify explicit values for bounding-box" else: if stored_extents is not None and stored_extents.any(): replace_default_entries(bounding_box_zyx, stored_extents) ## ## message-block-shape ## preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [block_width, block_width, 100 * block_width]) ## ## available-scales ## available_scales = list(volume_config["geometry"]["available-scales"]) ## ## resource_manager_client ## if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) ## ## Special setting to override resource manager for sparse coords ## try: use_resource_manager_for_sparse_coords = volume_config["dvid"][ "use-resource-manager-for-sparse-coords"] except KeyError: # Grayscale doesn't have this setting use_resource_manager_for_sparse_coords = False ## ## Store members ## self._resource_manager_client = resource_manager_client self._block_width = block_width self._bounding_box_zyx = bounding_box_zyx self._preferred_message_shape_zyx = preferred_message_shape_zyx self._available_scales = available_scales self._use_resource_manager_for_sparse_coords = use_resource_manager_for_sparse_coords self.write_empty_blocks = volume_config["dvid"]["write-empty-blocks"] ## ## Overwrite config entries that we might have modified ## volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( ) # TODO: Check the server for available scales and overwrite in the config? #volume_config["geometry"]["available-scales"] = [0] if volume_config["dvid"]["create-if-necessary"]: self._create_instance(volume_config)
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, TensorStoreVolumeSchema, inject_defaults=True) if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) self.volume_config = volume_config try: # Strip 'gs://' if the user provided it. bucket = volume_config['tensorstore']['spec']['kvstore']['bucket'] if bucket.startswith('precomputed://'): bucket = bucket[len('precomputed://'):] if bucket.startswith('gs://'): bucket = bucket[len('gs://'):] volume_config['tensorstore']['spec']['kvstore'][ 'bucket'] = bucket except KeyError: pass self._stores = {} store = self.store(0) spec = store.spec() block_width = volume_config["geometry"]["block-width"] if block_width == -1: block_width = spec.to_json()['scale_metadata']['chunk_size'][0] preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [256, 256, 256]) # Convert box from xyzc -> zyx store_box_zyx = np.array( [spec.domain.inclusive_min, spec.domain.exclusive_max])[:, :3][:, ::-1] bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] replace_default_entries(bounding_box_zyx, store_box_zyx) assert (bounding_box_zyx[0] >= store_box_zyx[0]).all() and (bounding_box_zyx[1] <= store_box_zyx[1]).all(), \ f"Specified bounding box ({bounding_box_zyx[:, ::-1].tolist()}) extends outside the "\ f"TensorStore volume geometry ({store_box_zyx[:, ::-1].tolist()})" # FIXME: Figure out how to configure this automatically. available_scales = list(volume_config["geometry"]["available-scales"]) # Store members self._dtype = spec.dtype.numpy_dtype self._block_width = block_width self._bounding_box_zyx = bounding_box_zyx self._resource_manager_client = resource_manager_client self._preferred_message_shape_zyx = preferred_message_shape_zyx self._available_scales = available_scales self._reinitialize_via = volume_config["tensorstore"][ "reinitialize-via"] # Overwrite config entries that we might have modified volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( )
def __init__(self, volume_config, resource_manager_client=None): validate(volume_config, BossVolumeSchema, inject_defaults=True) if resource_manager_client is None: # Dummy client resource_manager_client = ResourceManagerClient("", 0) try: token = os.environ["BOSS_TOKEN"] except KeyError: raise RuntimeError( "You must define the BOSS_TOKEN environment variable to use BossVolumeService" ) self._boss = BossRemote({ "protocol": "https", "host": volume_config["boss"]["host"], "token": token }) self._channel = self._boss.get_channel( volume_config["boss"]["channel"], volume_config["boss"]["collection"], volume_config["boss"]["experiment"], ) block_width = volume_config["geometry"]["block-width"] if block_width == -1: # FIXME: I don't think that the Boss uses a cube for blocks internally... # specifically (x, y, z) dimensions are (512, 512, 16) block_width = 16 preferred_message_shape_zyx = np.array( volume_config["geometry"]["message-block-shape"][::-1]) replace_default_entries(preferred_message_shape_zyx, [64, 64, 6400]) bounding_box_zyx = np.array( volume_config["geometry"]["bounding-box"])[:, ::-1] if -1 in bounding_box_zyx.flat: raise RuntimeError( "For BOSS volumes, you must explicity supply the entire bounding box in your config." ) #replace_default_entries(bounding_box_zyx, self._boss.get_coordinate_frame....) # assert (bounding_box_zyx[0] >= self._boss_client.bounding_box[0]).all() \ # and (bounding_box_zyx[1] <= self._boss_client.bounding_box[1]).all(), \ # f"Specified bounding box ({bounding_box_zyx.tolist()}) extends outside the "\ # f"Boss volume geometry ({self._boss_client.bounding_box.tolist()})" available_scales = list(volume_config["geometry"]["available-scales"]) # Store members self._bounding_box_zyx = bounding_box_zyx self._resource_manager_client = resource_manager_client self._preferred_message_shape_zyx = preferred_message_shape_zyx self._block_width = block_width self._available_scales = available_scales # Overwrite config entries that we might have modified volume_config["geometry"]["block-width"] = self._block_width volume_config["geometry"][ "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist() volume_config["geometry"][ "message-block-shape"] = self._preferred_message_shape_zyx[:: -1].tolist( )