def test_copysegmentation_dvid_to_zarr(setup_dvid_to_zarr): template_dir, config, volume, dvid_address, repo_uuid, output_file = setup_dvid_to_zarr # Modify the config from above to compute pyramid scales, # and choose a bounding box that is aligned with the bricks even at scale 2 # (just for easier testing). box_zyx = [[0, 0, 0], [256, 256, 256]] config["input"]["geometry"]["bounding-box"] = box_zyx config["copysegmentation"]["pyramid-depth"] = 2 yaml = YAML() yaml.default_flow_style = False with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) execution_dir, _workflow = launch_flow(template_dir, 1) box_zyx = np.array(box_zyx) scale_0_vol = volume[box_to_slicing(*box_zyx)] scale_1_vol = downsample_labels(scale_0_vol, 2, True) scale_2_vol = downsample_labels(scale_1_vol, 2, True) store = zarr.NestedDirectoryStore(f"{execution_dir}/{output_file}") f = zarr.open(store, 'r') output_0_vol = f['s0'][box_to_slicing(*(box_zyx // 1))] output_1_vol = f['s1'][box_to_slicing(*(box_zyx // 2))] output_2_vol = f['s2'][box_to_slicing(*(box_zyx // 4))] assert (output_0_vol == scale_0_vol).all(), \ "Scale 0: Written vol does not match expected" assert (output_1_vol == scale_1_vol).all(), \ "Scale 1: Written vol does not match expected" assert (output_2_vol == scale_2_vol).all(), \ "Scale 2: Written vol does not match expected"
def get_subvolume(self, box_zyx, scale=0): box_zyx = np.array(box_zyx) orig_box = box_zyx.copy() box_zyx -= (self._global_offset // (2**scale)) clipped_box = box_intersection(box_zyx, [(0,0,0), self.zarr_dataset(scale).shape]) if (clipped_box == box_zyx).all(): return self.zarr_dataset(scale)[box_to_slicing(*box_zyx.tolist())] # Note that this message shows the true zarr storage bounds, # and doesn't show the logical bounds according to global_offset (if any). msg = f"Zarr Request is out-of-bounds (XYZ): {orig_box[:, ::-1].tolist()}" if self._out_of_bounds_access in ("permit", "permit-empty"): logger.warning(msg) else: msg += "\nAdd 'out-of-bounds-access' to your config to allow such requests" raise RuntimeError(msg) if (clipped_box[1] - clipped_box[0] <= 0).any(): # request is completely out-of-bounds; just return zeros return np.zeros(box_zyx[1] - box_zyx[0], self.dtype) # Request is partially out-of-bounds; read what we can, zero-fill for the rest. clipped_vol = self.zarr_dataset(scale)[box_to_slicing(*clipped_box.tolist())] result = np.zeros(box_zyx[1] - box_zyx[0], self.dtype) localbox = clipped_box - box_zyx[0] result[box_to_slicing(*localbox)] = clipped_vol return result
def test_write(volume_setup): tmpdir = tempfile.mkdtemp() config, volume = volume_setup global_offset = config["zarr"]["global-offset"][::-1] config["zarr"]["path"] = f"{tmpdir}/test_zarr_service_testvol_WRITE.zarr" if os.path.exists(config["zarr"]["path"]): os.unlink(config["zarr"]["path"]) # Can't initialize service if file doesn't exist with pytest.raises(RuntimeError) as excinfo: ZarrVolumeService(config) assert 'create-if-necessary' in str(excinfo.value) assert not os.path.exists(config["zarr"]["path"]) config["zarr"]["create-if-necessary"] = True config["zarr"]["creation-settings"] = { "shape": [*volume.shape][::-1], "dtype": str(volume.dtype), "chunk-shape": [32, 32, 32], "max-scale": 0 } # Write some data box = [(30, 40, 50), (50, 60, 70)] box = np.array(box) subvol = volume[box_to_slicing(*box)] service = ZarrVolumeService(config) service.write_subvolume(subvol, box[0] + global_offset) # Read it back. subvol = service.get_subvolume(box + global_offset) assert (subvol == volume[box_to_slicing(*box)]).all() # Write some out-of-bounds zeros oob_box = box.copy() oob_box[1, 2] = 500 subvol = np.zeros(oob_box[1] - oob_box[0], int) service.write_subvolume(subvol, oob_box[0] + global_offset) # Read it back. readback = service.get_subvolume(oob_box + global_offset) assert (readback == subvol).all() # Try writing something other than zeros -- should fail subvol[:, :, -1] = 1 with pytest.raises(RuntimeError): service.write_subvolume(subvol, oob_box[0] + global_offset)
def uncompress_volume(method, encoded_data, dtype, encoded_box_zyx, box_zyx=None): """ Uncompress the given encoded data using the specified scheme. If the data was encoded into a box that is larger than the box of interest, specify a separate box_zyx for the subvolume of interest. """ if method == 'gzip_labelarray': volume = decode_labelarray_volume(encoded_box_zyx, encoded_data) if method == 'lz4': shape = encoded_box_zyx[1] - encoded_box_zyx[0] buf = lz4.frame.decompress(encoded_data) volume = np.frombuffer(buf, dtype).reshape(shape) if method == 'lz4_2x': shape = encoded_box_zyx[1] - encoded_box_zyx[0] buf = lz4.frame.decompress(encoded_data) buf = lz4.frame.decompress(buf) volume = np.frombuffer(buf, dtype).reshape(shape) if box_zyx is None or (box_zyx == encoded_box_zyx).all(): return volume else: assert (box_zyx[0] >= encoded_box_zyx[0]).all() and (box_zyx[1] <= encoded_box_zyx[1]).all(), \ f"box_zyx ({box_zyx.tolist()}) must be contained within encoded_box_zyx ({encoded_box_zyx.tolist()})" vol_box = box_zyx - encoded_box_zyx[0] return volume[box_to_slicing(*vol_box)]
def deserialize_uint64_blocks(compressed_blocks, shape): """ Reconstitute a volume that was serialized with serialize_uint64_blocks(), above. NOTE: If the volume is not 64-px aligned, then the output will NOT be C-contiguous. """ if (np.array(shape) % 64).any(): padding = 64 - ( np.array(shape) % 64 ) aligned_shape = shape + padding else: aligned_shape = shape aligned_volume = np.empty( aligned_shape, dtype=np.uint64 ) block_view = view_as_blocks( aligned_volume, (64,64,64) ) for bi, (zi, yi, xi) in enumerate(np.ndindex(*block_view.shape[:3])): compressed_block = compressed_blocks[bi] # (See note above regarding recompression with LZ4) encoded_block = lz4.frame.decompress( compressed_block ) block = decode_label_block( encoded_block ) block_view[zi,yi,xi] = block if shape == tuple(aligned_shape): volume = aligned_volume else: # Trim volume = np.asarray(aligned_volume[box_to_slicing((0,0,0), shape)], order='C') return volume
def write_brick(output_service, scale, brick): shape = np.array(brick.volume.shape) assert (shape[0:2] == output_service.block_width).all() assert shape[2] % output_service.block_width == 0 # Omit leading/trailing empty blocks block_width = output_service.block_width assert (np.array(brick.volume.shape) % block_width).all() == 0 blockwise_view = view_as_blocks( brick.volume, brick.volume.shape[0:2] + (block_width,) ) # blockwise view has shape (1,1,X/bx, bz, by, bx) assert blockwise_view.shape[0:2] == (1,1) blockwise_view = blockwise_view[0,0] # drop singleton axes block_maxes = blockwise_view.max( axis=(1,2,3) ) assert block_maxes.ndim == 1 nonzero_block_indexes = np.nonzero(block_maxes)[0] if len(nonzero_block_indexes) == 0: return # brick is completely empty first_nonzero_block = nonzero_block_indexes[0] last_nonzero_block = nonzero_block_indexes[-1] nonzero_start = (0, 0, block_width*first_nonzero_block) nonzero_stop = ( brick.volume.shape[0:2] + (block_width*(last_nonzero_block+1),) ) nonzero_subvol = brick.volume[box_to_slicing(nonzero_start, nonzero_stop)] nonzero_subvol = np.asarray(nonzero_subvol, order='C') output_service.write_subvolume(nonzero_subvol, brick.physical_box[0] + nonzero_start, scale)
def _run_to_dvid(setup, check_scale_0=True): template_dir, config, volume, dvid_address, repo_uuid, output_grayscale_name = setup yaml = YAML() yaml.default_flow_style = False # re-dump config in case it's been changed by a specific test with open(f"{template_dir}/workflow.yaml", 'w') as f: yaml.dump(config, f) _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config box_xyz = np.array(final_config['input']['geometry']['bounding-box']) box_zyx = box_xyz[:, ::-1] output_vol = fetch_raw(dvid_address, repo_uuid, output_grayscale_name, box_zyx) expected_vol = volume[box_to_slicing(*box_zyx)] if check_scale_0: assert (output_vol == expected_vol).all(), \ "Written vol does not match expected" return box_zyx, expected_vol
def setup_transposed_volume_service(): test_dir = tempfile.mkdtemp() test_file = f'{test_dir}/scaled-volume-test.h5' full_volume = np.random.randint(255, size=(256, 256, 256)) with h5py.File(test_file, 'w') as f: f['volume'] = full_volume box_zyx = np.array([[0, 0, 0], [100, 200, 256]]) box_xyz = box_zyx[:, ::-1] RAW_VOLUME_DATA = full_volume[box_to_slicing(*box_zyx)] VOLUME_CONFIG = { "hdf5": { "path": test_file, "dataset": "volume" }, "geometry": { "bounding-box": box_xyz.tolist(), "available-scales": [0] # Ensure only the first scale is used. } } # First, hdf5 alone h5_reader = Hdf5VolumeService(VOLUME_CONFIG) assert (h5_reader.bounding_box_zyx == box_zyx).all() full_from_h5 = h5_reader.get_subvolume(h5_reader.bounding_box_zyx) assert full_from_h5.shape == (*(box_zyx[1] - box_zyx[0]), ) assert (full_from_h5 == RAW_VOLUME_DATA).all() return RAW_VOLUME_DATA, VOLUME_CONFIG, full_from_h5, h5_reader
def test_get_union_mask_for_bodies(self): union_mask, box, blocksize = sparkdvid.get_union_block_mask_for_bodies( TEST_DVID_SERVER, self.uuid, self.instance, [1, 2]) expected, _ = downsample_binary_3d_suppress_zero( self.labels.astype(bool), 64) assert blocksize == (64, 64, 64) assert (expected[box_to_slicing(*box)] == union_mask).all()
def check_vol(box_zyx, scale): # raw volume handle spec = dict(config["tensorstore"]["spec"]) spec['scale_index'] = scale context = ts.Context(config["tensorstore"]["context"]) store = ts.open(spec, read=True, write=False, context=context).result() store_box = np.array([ store.spec().domain.inclusive_min[:3][::-1], store.spec().domain.exclusive_max[:3][::-1] ]) # Just verify that the 'service' wrapper is consistent with the low-level handle assert service.dtype == store.dtype.numpy_dtype assert (service.bounding_box_zyx // (2**scale) == store_box).all(), \ f"{service.bounding_box_zyx.tolist()} != {store_box.tolist()}" if scale == 0: # Service INSERTS geometry into config if necessary assert config["geometry"][ "bounding-box"] == store_box[:, ::-1].tolist() store_subvol = store[box_to_slicing(*box_zyx[:, ::-1])].read( order='F').result().transpose() assert store_subvol.any( ), "Volume from raw API is all zeros; this is a bad test" subvol = service.get_subvolume(box_zyx, scale) assert subvol.any(), "Volume from service is all zeros" assert (subvol.shape == (box_zyx[1] - box_zyx[0])).all() assert (subvol == store_subvol).all()
def write_subvolume(self, subvolume, offset_zyx, scale=0): assert scale == 0 box = np.array([offset_zyx, offset_zyx]) box[1] += subvolume.shape self.dataset[box_to_slicing(*box)] = subvolume
def write_subvolume(self, subvolume, offset_zyx, scale=0): offset_zyx = np.array(offset_zyx) offset_zyx -= self._global_offset // (2**scale) box = np.array([offset_zyx, offset_zyx + subvolume.shape]) stored_bounding_box = (self._bounding_box_zyx - self._global_offset) // (2**scale) if (box[0] >= 0).all() and (box[1] <= stored_bounding_box[1]).all(): # Box is fully contained within the Zarr volume bounding box. self.zarr_dataset(scale)[box_to_slicing(*box)] = subvolume else: msg = ( "Box extends beyond Zarr volume bounds (XYZ): " f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}" ) if self._out_of_bounds_access == 'forbid': # Note that this message shows the true zarr storage bounds, # and doesn't show the logical bounds according to global_offset (if any). msg = "Cannot write subvolume. " + msg msg += "\nAdd permit-out-of-bounds to your config to allow such writes," msg += " assuming the out-of-bounds portion is completely empty." raise RuntimeError(msg) clipped_box = box_intersection(box, stored_bounding_box) # If any of the out-of-bounds portion is non-empty, that's an error. subvol_copy = subvolume.copy() subvol_copy[box_to_slicing(*(clipped_box - box[0]))] = 0 if self._out_of_bounds_access == 'permit-empty' and subvol_copy.any( ): # Note that this message shows the true zarr storage bounds, # and doesn't show the logical bounds according to global_offset (if any). msg = ( "Cannot write subvolume. Box extends beyond Zarr volume storage bounds (XYZ): " f"{box[:, ::-1].tolist()} exceeds {stored_bounding_box[:, ::-1].tolist()}\n" "and the out-of-bounds portion is not empty (contains non-zero values).\n" ) raise RuntimeError(msg) logger.warning(msg) clipped_subvolume = subvolume[box_to_slicing(*clipped_box - box[0])] self.zarr_dataset(scale)[box_to_slicing( *clipped_box)] = clipped_subvolume
def place_test_object(label, corner, height): corner = np.array(corner) object_vol = create_test_object(height).astype(np.uint64) object_vol *= label object_box = np.array([corner, corner + object_vol.shape]) testvol_view = test_volume[box_to_slicing(*object_box)] testvol_view[:] = np.where(object_vol, object_vol, testvol_view) return object_box, (object_vol != 0).sum()
def get_subvolume(self, box_zyx, scale=0): assert scale == 0, "Slice File reader only supports scale 0" z_offset = box_zyx[0,0] yx_box = box_zyx[:,1:] output = np.ndarray(shape=(box_zyx[1] - box_zyx[0]), dtype=self.dtype) for z in range(*box_zyx[:,0]): slice_path = self._slice_fmt.format(z) slice_data = np.array( Image.open(slice_path).convert("L") ) output[z-z_offset] = slice_data[box_to_slicing(*yx_box)] return output
def get_subvolume(self, box_zyx, scale=0): """ Extract the subvolume, specified in new (scaled) coordinates from the original volume service, then scale result accordingly before returning it. """ box_zyx = np.asarray(box_zyx) true_scale = scale + self.scale_delta if true_scale in self.original_volume_service.available_scales: # The original source already has the data at the necessary scale. return self.original_volume_service.get_subvolume( box_zyx, true_scale) # Start with the closest scale we've got base_scales = np.array(self.original_volume_service.available_scales) i_best = np.abs(base_scales - true_scale).argmin() best_base_scale = base_scales[i_best] delta_from_best = true_scale - best_base_scale if delta_from_best > 0: orig_box_zyx = box_zyx * 2**delta_from_best orig_data = self.original_volume_service.get_subvolume( orig_box_zyx, best_base_scale) if self.method: #print(f"orig_data.shape: {scale}, {box_zyx}, {orig_box_zyx}, {orig_data.shape}, {delta_from_best}") downsampled_data = downsample(orig_data, 2**delta_from_best, self.method) elif np.dtype(self.dtype) == np.uint64: # Assume that uint64 means labels. ## FIXME: Our C++ method for downsampling ('labels') ## seems to have a bad build at the moment (it segfaults and/or produces zeros) ## For now, we use the 'labels-numba' method downsampled_data = downsample(orig_data, 2**delta_from_best, 'labels-numba') else: downsampled_data = downsample(orig_data, 2**delta_from_best, 'block-mean') return downsampled_data else: upsample_factor = int(2**-delta_from_best) orig_box_zyx = downsample_box(box_zyx, np.array(3 * (upsample_factor, ))) orig_data = self.original_volume_service.get_subvolume( orig_box_zyx, best_base_scale) upsampled_data = upsample(orig_data, upsample_factor) relative_box = box_zyx - upsample_factor * orig_box_zyx[0] requested_data = upsampled_data[box_to_slicing(*relative_box)] # Force contiguous so caller doesn't have to worry about it. return np.asarray(requested_data, order='C')
def test_copygrayscale_from_hdf5_to_slices(disable_auto_retry): template_dir = tempfile.mkdtemp(suffix="copygrayscale-from-hdf5-template") # Create volume, write to HDF5 volume = np.random.randint(10, size=TESTVOL_SHAPE, dtype=np.uint8) volume_path = f"{template_dir}/volume.h5" with h5py.File(volume_path, 'w') as f: f['volume'] = volume SLICE_FMT = 'slices/{:04d}.png' config_text = textwrap.dedent(f"""\ workflow-name: copygrayscale cluster-type: {CLUSTER_TYPE} input: hdf5: path: {volume_path} dataset: volume geometry: message-block-shape: [64,64,256] bounding-box: [[0,0,100], [256,200,256]] adapters: # Enable multi-scale, since otherwise # Hdf5VolumeService doesn't support it out-of-the box rescale-level: 0 output: slice-files: slice-path-format: "{SLICE_FMT}" dtype: uint8 copygrayscale: max-pyramid-scale: 0 slab-depth: 128 """) with open(f"{template_dir}/workflow.yaml", 'w') as f: f.write(config_text) _execution_dir, workflow = launch_flow(template_dir, 1) final_config = workflow.config box_xyz = np.array(final_config['input']['geometry']['bounding-box']) box_zyx = box_xyz[:, ::-1] output_vol = SliceFilesVolumeService(final_config['output']).get_subvolume( [[100, 0, 0], [256, 200, 256]]) expected_vol = volume[box_to_slicing(*box_zyx)] assert (output_vol == expected_vol).all(), \ "Written vol does not match expected"
def _fill_gaps(mask, mask_box, analysis_scale, dilation_radius_s0, dilation_box): """ Fill gaps between segments in the mask by dilating each segment and keeping the voxels that were covered by more than one dilation. """ # Perform light dilation on the mask to fix gaps in the # segmentation due to hot knife seams, downsampling, etc. if dilation_radius_s0 == 0: return mask # We limit the dilation repair to a central box, to avoid joining # dendrites that just barely enter the volume in multiple places. # We only want to make repairs that aren't near the volume edge. dilation_box = box_intersection(mask_box, dilation_box) if (dilation_box[1] - dilation_box[0] <= 0).any(): return mask # Perform dilation on each connected component independently, # and mark the areas where two dilated components overlap. # We'll add those overlapping voxels to the mask, to span # small gap defects in the segmentation. cc = labelMultiArrayWithBackground((mask != 0).view(np.uint8)) cc_max = cc.max() if cc_max <= 1: return mask central_box = dilation_box - mask_box[0] cc_central = cc[box_to_slicing(*central_box)] dilation_radius = dilation_radius_s0 // (2**analysis_scale) dilated_intersections = np.zeros(cc_central.shape, bool) dilated_all = vigra.filters.multiBinaryDilation((cc_central == 1), dilation_radius) for i in range(2, cc_max+1): cc_dilated = vigra.filters.multiBinaryDilation((cc_central == i), dilation_radius) dilated_intersections[:] |= (dilated_all & cc_dilated) dilated_all[:] |= cc_dilated # Return a new array; don't modify the original in-place. mask = mask.astype(bool, copy=True) mask[box_to_slicing(*central_box)] |= dilated_intersections return mask.view(np.uint8)
def get_subvolume(self, box_zyx, scale=0): box_zyx = np.array(box_zyx) assert scale == 0, "Slice File reader only supports scale 0" z_offset = box_zyx[0, 0] yx_box = box_zyx[:, 1:] - self.slice_corner_yx output = np.ndarray(shape=(box_zyx[1] - box_zyx[0]), dtype=self.dtype) for z in range(*box_zyx[:, 0]): slice_path = self._slice_fmt.format(z) slice_data = np.array(Image.open(slice_path).convert("L")) output[z - z_offset] = slice_data[box_to_slicing(*yx_box)] return output
def clip_to_logical( brick ): """ Truncate the given brick so that it's volume does not exceed the bounds of its logical_box. (Useful if the brick was originally constructed with a halo.) """ intersection = box_intersection(brick.physical_box, brick.logical_box) assert (intersection[1] > intersection[0]).all(), \ f"physical_box ({brick.physical_box}) does not intersect logical_box ({brick.logical_box})" intersection_within_physical = intersection - brick.physical_box[0] new_vol = brick.volume[ box_to_slicing(*intersection_within_physical) ] return Brick( brick.logical_box, intersection, new_vol )
def test_subvolume_no_scaling(setup_hdf5_service): _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service box = np.array([[13, 15, 20], [100, 101, 91]]) subvol_from_h5 = full_from_h5[box_to_slicing(*box)].copy('C') scaled_reader = ScaledVolumeService(h5_reader, 0) subvol_scaled = scaled_reader.get_subvolume(box) assert (subvol_scaled.shape == box[1] - box[0]).all() assert subvol_from_h5.shape == subvol_scaled.shape, \ f"{subvol_scaled.shape} != {subvol_from_h5.shape}" assert (subvol_scaled == subvol_from_h5).all() assert subvol_scaled.flags.c_contiguous
def test_read(volume_setup): config, volume = volume_setup global_offset = config["zarr"]["global-offset"][::-1] service = ZarrVolumeService(config) assert (service.bounding_box_zyx - global_offset == [(0, 0, 0), volume.shape]).all() assert service.dtype == volume.dtype # Service INSERTS geometry into config if necessary assert (config["geometry"]["bounding-box"] == service.bounding_box_zyx[:, ::-1]).all() box = np.array([(30, 40, 50), (50, 60, 70)]) subvol = service.get_subvolume(box + global_offset) assert (subvol == volume[box_to_slicing(*box)]).all() # Check out-of-bounds read (should be zeros) oob_box = box.copy() oob_box[1, 2] = 500 subvol = service.get_subvolume(oob_box + global_offset) # In-bounds portion should match assert (subvol[box_to_slicing(*box - box[0])] == volume[box_to_slicing( *box)]).all() # Everything else should be zeros assert (subvol[:, :, 128:] == 0).all() # # Check sample_labels() # points = [np.random.randint(d, size=(10, )) for d in volume.shape] points = np.transpose(points) global_points = points + global_offset labels = service.sample_labels(global_points) assert (labels == volume[(*points.transpose(), )]).all()
def test_read_slab(read_slices_setup): volume, config = read_slices_setup box = np.array([(0, 0, 0), volume.shape]) # Slab from z=64 to z=128 box[:, 0] = [64, 128] slab_from_raw = volume[box_to_slicing(*box)] reader = SliceFilesVolumeService(config) slab_from_slices = reader.get_subvolume(box) assert slab_from_slices.shape == slab_from_raw.shape, \ f"Wrong shape: Expected {slab_from_raw.shape}, Got {slab_from_slices.shape}" assert (slab_from_slices == slab_from_raw).all()
def test_write(volume_setup): config, volume = volume_setup config["hdf5"]["path"] = "/tmp/test_hdf5_service_testvol_WRITE.h5" if os.path.exists(config["hdf5"]["path"]): os.unlink(config["hdf5"]["path"]) # Can't initialize service if file doesn't exist with pytest.raises(RuntimeError) as excinfo: Hdf5VolumeService(config) assert 'writable' in str(excinfo.value) # After setting writable=true, we can initialize the service. assert not os.path.exists(config["hdf5"]["path"]) config["hdf5"]["writable"] = True # Write some data box = [(30,40,50), (50,60,70)] subvol = volume[box_to_slicing(*box)] service = Hdf5VolumeService(config) service.write_subvolume(subvol, box[0]) # Read it back. subvol = service.get_subvolume(box) assert (subvol == volume[box_to_slicing(*box)]).all()
def test_read(volume_setup): config, volume = volume_setup service = Hdf5VolumeService(config) assert (service.bounding_box_zyx == [(0,0,0),volume.shape]).all() assert service.dtype == volume.dtype # Service INSERTS geometry into config if necessary assert config["geometry"]["bounding-box"] == [[0,0,0], list(volume.shape[::-1])] assert config["hdf5"]["dtype"] == volume.dtype.name box = [(30,40,50), (50,60,70)] subvol = service.get_subvolume(box) assert (subvol == volume[box_to_slicing(*box)]).all()
def write_brick(output_service, scale, brick): # For most outputs, we just write the whole brick. if not isinstance(output_service.base_service, DvidVolumeService): output_service.write_subvolume(brick.volume, brick.physical_box[0], scale) # For dvid outputs, implement a special optimization. # We trim empty blocks from the left/right of the brick. else: # Typically, users will prefer bricks of shape (64,64,N). # However, if the bricks wider than 64, this code still works, # but all blocks for a given X must be empty for the brick to be trimmed. block_width = output_service.block_width assert np.array(brick.volume.shape)[2] % block_width == 0, \ "Brick X-dimension is not a multiple of the DVID block-shape" # Omit leading/trailing empty blocks assert (np.array(brick.volume.shape) % block_width).all() == 0 blockwise_view = view_as_blocks( brick.volume, brick.volume.shape[0:2] + (block_width, )) # blockwise view has shape (1,1,X/bx, bz, by, bx) assert blockwise_view.shape[0:2] == (1, 1) blockwise_view = blockwise_view[0, 0] # drop singleton axes # Compute max in each block to determine the non-empty blocks block_maxes = blockwise_view.max(axis=(1, 2, 3)) assert block_maxes.ndim == 1 nonzero_block_indexes = np.nonzero(block_maxes)[0] if len(nonzero_block_indexes) == 0: return # brick is completely empty first_nonzero_block = nonzero_block_indexes[0] last_nonzero_block = nonzero_block_indexes[-1] nonzero_start = (0, 0, block_width * first_nonzero_block) nonzero_stop = (brick.volume.shape[0:2] + (block_width * (last_nonzero_block + 1), )) nonzero_subvol = brick.volume[box_to_slicing(nonzero_start, nonzero_stop)] nonzero_subvol = np.asarray(nonzero_subvol, order='C') output_service.write_subvolume(nonzero_subvol, brick.physical_box[0] + nonzero_start, scale)
def get_subvolume(self, box, scale=0): req_bytes = 8 * np.prod(box[1] - box[0]) with self._resource_manager_client.access_context( 'brainmaps', True, 1, req_bytes): if not self._fetch_blockwise: return self._brainmaps_client.get_subvolume(box, scale) else: block_shape = 3 * (self._block_width, ) subvol = np.zeros(box[1] - box[0], self.dtype) for block_box in boxes_from_grid(box, block_shape, clipped=True): block = self._brainmaps_client.get_subvolume( block_box, scale) outbox = block_box - box[0] subvol[box_to_slicing(*outbox)] = block return subvol
def test_subvolume_downsample_1(setup_hdf5_service): _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service down_box = np.array([[13, 15, 20], [20, 40, 41]]) up_box = 2 * down_box up_subvol_from_h5 = full_from_h5[box_to_slicing(*up_box)] down_subvol_from_h5 = downsample(up_subvol_from_h5, 2, 'block-mean') # Scale 1 scaled_reader = ScaledVolumeService(h5_reader, 1) subvol_scaled = scaled_reader.get_subvolume(down_box) assert (subvol_scaled.shape == down_box[1] - down_box[0]).all() assert down_subvol_from_h5.shape == subvol_scaled.shape, \ f"{subvol_scaled.shape} != {down_subvol_from_h5.shape}" assert (subvol_scaled == down_subvol_from_h5).all() assert subvol_scaled.flags.c_contiguous
def test_subvolume_upsample_1(setup_hdf5_service): _raw_volume, _volume_config, full_from_h5, h5_reader = setup_hdf5_service up_box = np.array([[13, 15, 20], [100, 101, 91]]) full_upsampled_vol = np.empty(2 * np.array(full_from_h5.shape), dtype=h5_reader.dtype) up_view = view_as_blocks(full_upsampled_vol, (2, 2, 2)) up_view[:] = full_from_h5[:, :, :, None, None, None] up_subvol_from_h5 = full_upsampled_vol[box_to_slicing(*up_box)] # Scale -1 scaled_reader = ScaledVolumeService(h5_reader, -1) subvol_scaled = scaled_reader.get_subvolume(up_box) assert (subvol_scaled.shape == up_box[1] - up_box[0]).all() assert up_subvol_from_h5.shape == subvol_scaled.shape, \ f"{subvol_scaled.shape} != {up_subvol_from_h5.shape}" assert (subvol_scaled == up_subvol_from_h5).all() assert subvol_scaled.flags.c_contiguous
def get_subvolume(self, box_zyx, scale=0): """ Extract the subvolume, specified in new (scaled) coordinates from the original volume service, then scale result accordingly before returning it. """ true_scale = scale + self.scale_delta if true_scale in self.original_volume_service.available_scales: # The original source already has the data at the necessary scale. return self.original_volume_service.get_subvolume( box_zyx, true_scale ) # Start with the closest scale we've got base_scales = np.array(self.original_volume_service.available_scales) i_best = np.abs(base_scales - true_scale).argmin() best_base_scale = base_scales[i_best] delta_from_best = true_scale - best_base_scale if delta_from_best > 0: orig_box_zyx = box_zyx * 2**delta_from_best orig_data = self.original_volume_service.get_subvolume(orig_box_zyx, best_base_scale) if self.dtype == np.uint64: # Assume that uint64 means labels. downsampled_data, _ = downsample_labels_3d( orig_data, 2**self.scale_delta ) else: downsampled_data = downsample_raw( orig_data, self.scale_delta )[-1] return downsampled_data else: upsample_factor = int(2**-delta_from_best) orig_box_zyx = downsample_box(box_zyx, np.array(3*(upsample_factor,))) orig_data = self.original_volume_service.get_subvolume(orig_box_zyx, best_base_scale) orig_shape = np.array(orig_data.shape) upsampled_data = np.empty( orig_shape * upsample_factor, dtype=self.dtype ) v = view_as_blocks(upsampled_data, 3*(upsample_factor,)) v[:] = orig_data[:,:,:,None, None, None] relative_box = box_zyx - upsample_factor*orig_box_zyx[0] requested_data = upsampled_data[box_to_slicing(*relative_box)] # Force contiguous so caller doesn't have to worry about it. return np.asarray(requested_data, order='C')
def setup_labelmap_test(): test_dir = tempfile.mkdtemp() test_file = f'{test_dir}/mapped-volume-test.h5' full_volume = np.random.randint(100, size=(256, 256, 256), dtype=np.uint64) with h5py.File(test_file, 'w') as f: f['volume'] = full_volume box_zyx = np.array([[0, 0, 0], [100, 200, 256]]) box_xyz = box_zyx[:, ::-1] RAW_VOLUME_DATA = full_volume[box_to_slicing(*box_zyx)] VOLUME_CONFIG = { "hdf5": { "path": test_file, "dataset": "volume" }, "geometry": { "bounding-box": box_xyz.tolist(), "available-scales": [0] # Ensure only the first scale is used. } } # First, hdf5 alone h5_reader = Hdf5VolumeService(VOLUME_CONFIG) assert (h5_reader.bounding_box_zyx == box_zyx).all() full_from_h5 = h5_reader.get_subvolume(h5_reader.bounding_box_zyx) assert full_from_h5.shape == (*(box_zyx[1] - box_zyx[0]), ) assert (full_from_h5 == RAW_VOLUME_DATA).all() mapping_path = f'{test_dir}/mapping.csv' mapping = pd.DataFrame({ 'orig': np.arange(100), 'body': np.arange(100) + 1000 }) mapping.to_csv(mapping_path, index=False, header=True) labelmap_config = {"file": mapping_path, "file-type": "label-to-body"} expected_vol = RAW_VOLUME_DATA + 1000 return RAW_VOLUME_DATA, expected_vol, labelmap_config, full_from_h5, h5_reader
def fetch_supervoxel_mask(server, uuid, instance, sv, max_box_volume): """ Fetch a mask for the given supervoxel. The mask will be downloaded at a scale which is chosen such that the mask's bounding box will not exceed the given volume. """ coarse_coords = fetch_sparsevol_coarse(server, uuid, instance, sv, supervoxels=True) # (Note: sparsevol-coarse is returned at scale 6) box = (2**6) * np.array( [coarse_coords.min(axis=0), 1 + coarse_coords.max(axis=0)]) shape = box[1] - box[0] scale = 0 # Select a scale while np.prod(shape) > max_box_volume: scale += 1 box //= 2 shape = box[1] - box[0] # Fetch sparse masks ns = DVIDNodeService(server, uuid) block_coords, block_masks = ns.get_sparselabelmask(sv, instance, scale, supervoxels=True) fetched_box = np.array( [block_coords.min(axis=0), 64 + block_coords.max(axis=0)]) fetched_shape = fetched_box[1] - fetched_box[0] # Combine sparse masks into a single array full_mask = np.zeros(fetched_shape, dtype=bool) for coord, mask in zip(block_coords, block_masks): mask_box = np.array([coord, coord + 64]) - fetched_box[0] full_mask[box_to_slicing(*mask_box)] = mask return full_mask, scale, fetched_box
def test_encode_nonaligned_labelarray_volume(): nonaligned_box = np.array([(520, 1050, 2050), (620, 1150, 2150)]) nonaligned_vol = np.random.randint(1000, 2000, size=(100, 100, 100), dtype=np.uint64) aligned_start = np.array((512, 1024, 2048)) aligned_box = np.array([aligned_start, aligned_start + 128]) aligned_vol = np.zeros((128, 128, 128), dtype=np.uint64) aligned_vol[box_to_slicing(*(nonaligned_box - aligned_box[0]))] = nonaligned_vol encoded_box, encoded_vol = encode_nonaligned_labelarray_volume( nonaligned_box[0], nonaligned_vol) inflated = DVIDNodeService.inflate_labelarray_blocks3D_from_raw( encoded_vol, (128, 128, 128), aligned_start) assert (encoded_box == aligned_box).all() assert (inflated == aligned_vol).all()
def get_subvolume(self, box_zyx, scale=0): req_bytes = 8 * np.prod(box_zyx[1] - box_zyx[0]) try: resource_name = self.volume_config['tensorstore']['spec'][ 'kvstore']['bucket'] except KeyError: resource_name = self.volume_config['tensorstore']['spec']['path'] with self._resource_manager_client.access_context( resource_name, True, 1, req_bytes): store = self.store(scale) # Tensorstore uses X,Y,Z conventions, so it's best to # request a Fortran array and transpose it ourselves. box_xyz = box_zyx[:, ::-1] vol_xyzc = store[box_to_slicing(*box_xyz)].read(order='F').result() vol_xyz = vol_xyzc[..., 0] vol_zyx = vol_xyz.transpose() assert (vol_zyx.shape == (box_zyx[1] - box_zyx[0])).all(), \ f"Fetched volume_zyx shape ({vol_zyx.shape} doesn't match box_zyx {box_zyx.tolist()}" return vol_zyx
def serialize_uint64_blocks(volume): """ Compress and serialize a volume of uint64. Preconditions: - volume.dtype == np.uint64 - volume.ndim == 3 NOTE: If volume.shape is NOT divisible by 64, the input will be copied and padded. Returns compressed_blocks, where the blocks are a flat list, in scan-order """ assert volume.dtype == np.uint64 assert volume.ndim == 3 if (np.array(volume.shape) % 64).any(): padding = 64 - ( np.array(volume.shape) % 64 ) aligned_shape = volume.shape + padding aligned_volume = np.zeros( aligned_shape, dtype=np.uint64 ) aligned_volume[box_to_slicing((0,0,0), volume.shape)] = volume else: aligned_volume = volume assert (np.array(aligned_volume.shape) % 64 == 0).all() block_view = view_as_blocks( aligned_volume, (64,64,64) ) compressed_blocks = [] for zi, yi, xi in np.ndindex(*block_view.shape[:3]): block = block_view[zi,yi,xi].copy('C') encoded_block = encode_label_block(block) # We compress AGAIN, with LZ4, because this seems to provide # an additional 2x size reduction for nearly no slowdown. compressed_block = lz4.frame.compress( encoded_block ) compressed_blocks.append( compressed_block ) del block return compressed_blocks
def block_stats_from_brick(block_shape, brick): """ Get the count of voxels for each segment (excluding segment 0) in each block within the given brick, returned as a DataFrame. Returns a DataFrame with the following columns: ['segment_id', 'z', 'y', 'x', 'count'] where z,y,z are the starting coordinates of each block. """ block_grid = Grid(block_shape) block_dfs = [] block_boxes = boxes_from_grid(brick.physical_box, block_grid) for box in block_boxes: clipped_box = box_intersection(box, brick.physical_box) - brick.physical_box[0] block_vol = brick.volume[box_to_slicing(*clipped_box)] counts = pd.Series(block_vol.reshape(-1)).value_counts(sort=False) segment_ids = counts.index.values counts = counts.values.astype(np.uint32) box = box.astype(np.int32) block_df = pd.DataFrame( { 'segment_id': segment_ids, 'count': counts, 'z': box[0][0], 'y': box[0][1], 'x': box[0][2] } ) # Exclude segment 0 from output block_df = block_df[block_df['segment_id'] != 0] block_dfs.append(block_df) brick_df = pd.concat(block_dfs, ignore_index=True) brick_df = brick_df[['segment_id', 'z', 'y', 'x', 'count']] assert list(brick_df.columns) == list(BLOCK_STATS_DTYPES.keys()) return brick_df
def get_subvolume(self, box_zyx, scale=0): box_zyx = np.asarray(box_zyx) return self.n5_dataset(scale)[box_to_slicing(*box_zyx.tolist())]
def stats_df_from_brick(column_names, brick, exclude_zero=True, exclude_halo=True): """ For a given brick, return a DataFrame of statistics for the segments it contains. Args: column_names (list): Which statistics to compute. Anything from COLUMNS_INFO is permitted, except compressed_bytes. The 'segment' column must be first in the list. brick (Brick): The brick to process exclude_zero (bool): Discard statistics for segment=0. exclude_halo (bool): Exclude voxels that lie outside the Brick's logical_box. Returns: pd.DataFrame, with df.columns == column_names """ import pandas as pd assert column_names[0] == 'segment' volume = brick.volume if exclude_halo and (brick.physical_box != brick.logical_box).any(): internal_box = box_intersection( brick.logical_box, brick.physical_box ) - brick.physical_box[0] volume = volume[box_to_slicing(*internal_box)] volume = np.asarray(volume, order='C') # We always compute segment and voxel_count TRIVIAL_COLUMNS = set(['segment', 'voxel_count']) counts = pd.Series(volume.ravel('K')).value_counts(sort=False) segment_ids = counts.index.values assert segment_ids.dtype == volume.dtype # Other columns are computed only if needed if set(column_names) - TRIVIAL_COLUMNS: # Must remap to consecutive segments before calling extractRegionFeatures() remapped_ids = np.arange(len(segment_ids), dtype=np.uint32) mapper = dvidutils.LabelMapper( segment_ids, remapped_ids ) remapped_vol = mapper.apply(volume) assert remapped_vol.dtype == np.uint32 remapped_vol = vigra.taggedView( remapped_vol, 'zyx' ) # Compute (local) bounding boxes. acc = vigra.analysis.extractRegionFeatures( np.zeros(remapped_vol.shape, np.float32), remapped_vol, ["Count", "Coord<Minimum >", "Coord<Maximum >"] ) assert (acc["Count"] == counts.values).all() # Use int64: int32 is dangerous because multiplying them together quickly overflows local_bb_starts = acc["Coord<Minimum >"].astype(np.int64) local_bb_stops = (1 + acc["Coord<Maximum >"]).astype(np.int64) global_bb_starts = local_bb_starts + brick.physical_box[0] global_bb_stops = local_bb_stops + brick.physical_box[0] if 'block_list' in column_names: block_lists = [] for remapped_id, start, stop in zip(remapped_ids, local_bb_starts, local_bb_stops): local_box = np.array((start, stop)) binary = (remapped_vol[box_to_slicing(*local_box)] == remapped_id) # This downsample function respects block-alignment, since we're providing the local_box reduced, block_bb = downsample_binary_3d_suppress_zero(binary, BLOCK_WIDTH, local_box) local_block_indexes = np.transpose(reduced.nonzero()) local_block_starts = BLOCK_WIDTH * (block_bb[0] + local_block_indexes) global_block_starts = brick.physical_box[0] + local_block_starts block_lists.append(global_block_starts) # Segment is always first. df = pd.DataFrame(columns=column_names) df['segment'] = segment_ids # Append columns in-order for column in column_names: if column == 'voxel_count': df['voxel_count'] = counts.values if column == 'block_list': df['block_list'] = block_lists if column == 'bounding_box_start': df['bounding_box_start'] = list(global_bb_starts) # Must convert to list or pandas complains about non-1D-data. if column == 'bounding_box_stop': df['bounding_box_stop'] = list(global_bb_stops) # ditto if column in ('z0', 'y0', 'x0'): df[column] = global_bb_starts[:, ('z0', 'y0', 'x0').index(column)] if column in ('z1', 'y1', 'x1'): df[column] = global_bb_stops[:, ('z1', 'y1', 'x1').index(column)] if column == 'compressed_bytes': raise RuntimeError("Can't compute compressed_bytes in this function.") if exclude_zero: df.drop(df.index[df.segment == 0], inplace=True) return df