def test_adjust_for_partition(): dataset_shape = Shape((16, 16, 32, 32), sig_dims=2) tileshape = Shape((3, 4, 32), sig_dims=2) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=dataset_shape, intent="partition", ) # the mismatch case: partition_slice = Slice( origin=(0, 0, 0), shape=Shape((4, 4, 32), sig_dims=2), ) partition = mock.MagicMock() partition.slice = partition_slice tiling_scheme_adj = tiling_scheme.adjust_for_partition(partition) assert tiling_scheme_adj != tiling_scheme assert tiling_scheme_adj.depth == 4 # the match case: partition_slice = Slice( origin=(0, 0, 0), shape=Shape((3, 4, 32), sig_dims=2), ) partition = mock.MagicMock() partition.slice = partition_slice tiling_scheme_adj = tiling_scheme.adjust_for_partition(partition) assert tiling_scheme_adj == tiling_scheme assert tiling_scheme_adj is tiling_scheme assert tiling_scheme_adj.depth == 3
def test_roi_to_nd_indices(): roi = np.full((5, 5), False) roi[1, 2] = True roi[2, 1:4] = True roi[3, 2] = True part_slice = Slice( origin=(2, 0, 0, 0), shape=Shape((2, 5, 16, 16), sig_dims=2) ) assert list(_roi_to_nd_indices(roi, part_slice)) == [ (2, 1), (2, 2), (2, 3), (3, 2) ] part_slice = Slice( origin=(0, 0, 0, 0), shape=Shape((5, 5, 16, 16), sig_dims=2) ) assert list(_roi_to_nd_indices(roi, part_slice)) == [ (1, 2), (2, 1), (2, 2), (2, 3), (3, 2) ]
def _get_subslices_chunked_tiled(self, tiling_scheme, scheme_lookup, nav_dims, tileshape_nd): """ general tiled reading w/ chunking outer loop is a chunk in signal dimensions, inner loop is over "rows in nav" """ slice_nd_sig = self.slice_nd.sig slice_nd_nav = self.slice_nd.nav chunks_nav = self._chunks[:nav_dims] sig_slices = slice_nd_sig.subslices(tiling_scheme.shape.sig) logger.debug( "_get_subslices_chunked_tiled: chunking first by sig %r, then nav %r, finally %r", tiling_scheme.shape.sig, chunks_nav, tileshape_nd) for sig_slice in sig_slices: chunk_slices = slice_nd_nav.subslices(shape=chunks_nav) for chunk_slice_nav in chunk_slices: chunk_slice = Slice( origin=chunk_slice_nav.origin + sig_slice.origin, shape=chunk_slice_nav.shape + tuple(sig_slice.shape), ) subslices = chunk_slice.subslices(shape=tileshape_nd) for subslice in subslices: scheme_key = (subslice.origin[nav_dims:], subslice.shape[nav_dims:]) idx = scheme_lookup[scheme_key] yield idx, subslice
def test_mask_caching_2(): input_masks = [ lambda: np.ones((128, 128)), lambda: np.zeros((128, 128)), ] mask_container = MaskContainer(mask_factories=input_masks, dtype="float32") shape1 = Shape((16 * 16, 128, 128), sig_dims=2) shape2 = Shape((8 * 16, 128, 128), sig_dims=2) slice_ = Slice(origin=(0, 0, 0), shape=shape1) mask_container.get(slice_) key = (mask_container.dtype, False, True, 'numpy') cache_info = mask_container._get_masks_for_slice[key].cache_info() assert cache_info.hits == 0 assert cache_info.misses == 1 mask_container.get(slice_) cache_info = mask_container._get_masks_for_slice[key].cache_info() assert cache_info.hits == 1 assert cache_info.misses == 1 slice_ = Slice(origin=(1, 0, 0), shape=shape2) mask_container.get(slice_) cache_info = mask_container._get_masks_for_slice[key].cache_info() assert cache_info.hits == 2 assert cache_info.misses == 1
def get_tiles(self, crop_to=None, full_frames=False): stackheight = self.tileshape.nav.size num_tiles = self.partfile.fields['num_images'] // stackheight tshape = self.tileshape.flatten_nav() sig_origin = (0, 0) if crop_to is not None and tshape.sig != crop_to.shape.sig: tshape = Shape(tuple(tshape.nav) + tuple(crop_to.shape.sig), sig_dims=tshape.sig.dims) sig_origin = crop_to.origin[1:] data = np.ndarray(tshape, dtype=self.dtype) for t in range(num_tiles): tile_slice = Slice( origin=(t * stackheight + self.slice.origin[0], ) + sig_origin, shape=tshape) if crop_to is not None: intersection = tile_slice.intersection_with(crop_to) if intersection.is_null(): continue self.partfile.read_frames(num=stackheight, offset=t * stackheight, out=data, crop_to=crop_to) assert all( [item > 0 for item in tile_slice.shift(self.slice).shape]) assert all( [item >= 0 for item in tile_slice.shift(self.slice).origin]) yield DataTile(data=data, tile_slice=tile_slice)
def get_partitions(self): ds_shape = Shape(self.shape, sig_dims=self.sig_dims) ds_slice = Slice(origin=[0] * len(self.shape), shape=ds_shape) target_size = self.target_size if target_size is None: if self._compression is None: target_size = 512 * 1024 * 1024 else: target_size = 256 * 1024 * 1024 partition_shape = self.partition_shape( target_size=target_size, dtype=self.dtype, ) + tuple(self.shape.sig) # if the data is chunked in the navigation axes, choose a compatible # partition size (even important for non-compressed data!) chunks = self._chunks if chunks is not None and not _have_contig_chunks(chunks, ds_shape): partition_shape = _partition_shape_for_chunking(chunks, ds_shape) for pslice in ds_slice.subslices(partition_shape): yield H5Partition( meta=self._meta, reader=self.get_reader(), partition_slice=pslice.flatten_nav(self.shape), slice_nd=pslice, io_backend=self.get_io_backend(), chunks=self._chunks, decoder=None, )
def _read_full_frames(self, crop_to=None, dest_dtype="float32", roi=None): with contextlib.ExitStack() as stack: frame_buf = zeros_aligned((1, 1860, 2048), dtype=dest_dtype) open_sectors = [ stack.enter_context(sector) for sector in self._sectors ] frame_offset = 0 if roi is not None: roi = roi.reshape((-1, )) frame_offset = np.count_nonzero(roi[:self._start_frame]) frames_read = 0 for frame in range(self._start_frame, self._start_frame + self._num_frames): if roi is not None and not roi[frame]: continue origin = frame if roi is not None: origin = frame_offset + frames_read tile_slice = Slice( origin=(origin, 0, 0), shape=Shape(frame_buf.shape, sig_dims=2), ) if crop_to is not None: intersection = tile_slice.intersection_with(crop_to) if intersection.is_null(): continue for s in open_sectors: s.read_full_frame( frame=frame, buf=frame_buf[:, :, s.idx * SECTOR_SIZE[1]:(s.idx + 1) * SECTOR_SIZE[1]]) yield DataTile(data=frame_buf, tile_slice=tile_slice) frames_read += 1
def test_mask_caching_2(): input_masks = [ lambda: np.ones((128, 128)), lambda: np.zeros((128, 128)), ] mask_container = MaskContainer(mask_factories=input_masks, dtype="float32") shape1 = Shape((16, 16, 128, 128), sig_dims=2) shape2 = Shape((8, 16, 128, 128), sig_dims=2) slice_ = Slice(origin=(0, 0, 0, 0), shape=shape1) mask_container[slice_] cache_info = mask_container._get_masks_for_slice.cache_info() assert cache_info.hits == 0 assert cache_info.misses == 1 mask_container[slice_] cache_info = mask_container._get_masks_for_slice.cache_info() assert cache_info.hits == 1 assert cache_info.misses == 1 slice_ = Slice(origin=(0, 1, 0, 0), shape=shape2) mask_container[slice_] cache_info = mask_container._get_masks_for_slice.cache_info() assert cache_info.hits == 2 assert cache_info.misses == 1
def apply(self, data: np.ndarray, tile_slice: Slice): """ Apply corrections in-place to `data`, cropping the correction data to the `tile_slice`. """ dark_frame = self.get_dark_frame() gain_map = self.get_gain_map() if not self.have_corrections(): return sig_slice = tile_slice.get(sig_only=True) if dark_frame is not None: dark_frame = dark_frame[sig_slice] if gain_map is not None: gain_map = gain_map[sig_slice] correct(buffer=data, dark_image=dark_frame, gain_map=gain_map, repair_descriptor=self.repair_descriptor( tile_slice.discard_nav()), inplace=True, sig_shape=tuple(tile_slice.shape.sig), allow_empty=self._allow_empty)
def test_get_signal_only(): s = Slice(origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2)) assert s.get(sig_only=True) == ( slice(0, 1), slice(0, 1), )
def test_shift_2(): s1 = Slice(origin=(2, 2, 0, 0), shape=Shape((1, 1, 2, 2), sig_dims=2)) s2 = Slice(origin=(1, 1, 0, 0), shape=Shape((1, 1, 4, 4), sig_dims=2)) shifted = s1.shift(s2) assert shifted.origin == (1, 1, 0, 0)
def test_from_shape(): s = Slice.from_shape( (1, 16, 16), sig_dims=2 ) assert s == Slice( origin=(0, 0, 0), shape=Shape((1, 16, 16), sig_dims=2), )
def test_subslices_non_even_division_2(): top_slice = Slice( origin=(0, 0, 0, 0), shape=Shape((3, 1, 1, 1), sig_dims=2), ) assert list(top_slice.subslices(shape=(2, 1, 1, 1))) == [ Slice(origin=(0, 0, 0, 0), shape=Shape((2, 1, 1, 1), sig_dims=2)), Slice(origin=(2, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2)), ]
def test_get_slice_stack_nav_only(): slice_ = Slice( origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2) ) data = np.arange(4 * 4 * 4 * 4).reshape(4, 4, 4, 4) assert slice_.get(data, nav_only=True).shape[0:2] == tuple(slice_.shape.nav) assert np.all(slice_.get(data, nav_only=True) == data[0:1, 0:1])
def test_get_slice_stack_signal_only(): slice_ = Slice( origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2) ) data = np.arange(4 * 4 * 4 * 4).reshape(4, 4, 4, 4) assert slice_.get(data, sig_only=True).shape[2:4] == tuple(slice_.shape.sig) assert np.all(slice_.get(data, sig_only=True) == data[..., 0:1, 0:1])
def test_get(): s = Slice(origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2)) assert s.get() == ( slice(0, 1), slice(0, 1), slice(0, 1), slice(0, 1), )
def test_flatten_nav_2(): s = Slice( origin=(0, 0, 0, 0), shape=Shape((2, 16, 16, 16), sig_dims=2) ) sflat = Slice( origin=(0, 0, 0), shape=Shape((32, 16, 16), sig_dims=2) ) assert s.flatten_nav((16, 16, 16, 16)) == sflat
def test_slice_intersect_0(): s1 = Slice( origin=(0, 0, 0, 0), shape=Shape((2, 2, 2, 2), sig_dims=2), ) s2 = Slice( origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2), ) assert s1.intersection_with(s2) == s2
def get_partitions(self): ds_slice = Slice(origin=tuple([0] * self.raw_shape.dims), shape=self.raw_shape) for pslice in ds_slice.subslices(self.partition_shape): yield MemoryPartition( tileshape=self.tileshape, meta=self._meta, reader=self.get_reader(), partition_slice=pslice, )
def test_flatten_nav(): s = Slice( origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2) ) sflat = Slice( origin=(0, 0, 0), shape=Shape((1, 1, 1), sig_dims=2) ) assert s.flatten_nav((1, 1, 1, 1)) == sflat
def _get_tiles_with_roi(self, crop_to, full_frames, dest_dtype, roi): """ With a ROI, we yield tiles from a "compressed" navigation axis, relative to the beginning of the partition. Compressed means, only frames that have a 1 in the ROI are considered, and the resulting tile slices are from a coordinate system that has the shape `(np.count_nonzero(roi),)`. """ start_at_frame = self._start_frame sig_shape = self.meta.shape.sig sig_origin = tuple([0] * len(sig_shape)) if crop_to is not None: sig_origin = tuple(crop_to.origin[-sig_shape.dims:]) sig_shape = crop_to.shape.sig if full_frames: sig_shape = self.meta.shape.sig stackheight = self._get_stackheight(sig_shape=sig_shape, dest_dtype=dest_dtype) tile_buf = zeros_aligned((stackheight, ) + tuple(sig_shape), dtype=dest_dtype) frames_read = 0 tile_idx = 0 frame_idx = start_at_frame indices = _roi_to_indices(roi, self._start_frame, self._start_frame + self._num_frames) with self._fileset as fileset: outer_frame = 0 for frame_idx in indices: fileset.read_images_multifile( start=frame_idx, stop=frame_idx + 1, out=tile_buf[tile_idx].reshape((1, ) + tuple(sig_shape)), crop_to=crop_to, ) tile_idx += 1 frames_read += 1 if tile_idx == stackheight: tile_slice = Slice(origin=(outer_frame, ) + sig_origin, shape=Shape( (tile_idx, ) + tuple(sig_shape), sig_dims=sig_shape.dims)) yield DataTile(data=tile_buf[:tile_idx, ...], tile_slice=tile_slice) tile_idx = 0 outer_frame = frames_read if tile_idx != 0: # last frame, size != stackheight tile_slice = Slice(origin=(outer_frame, ) + sig_origin, shape=Shape((tile_idx, ) + tuple(sig_shape), sig_dims=sig_shape.dims)) yield DataTile(data=tile_buf[:tile_idx, ...], tile_slice=tile_slice)
def test_get_slice_2(): slice_ = Slice( origin=(1, 1, 1, 1), shape=Shape((1, 1, 2, 2), sig_dims=2), ) data = np.arange(4 * 4 * 4 * 4).reshape(4, 4, 4, 4) assert slice_.get(data).shape == tuple(slice_.shape) assert np.all(slice_.get(data) == np.array([[[ [85, 86], [89, 90], ]]]))
def test_get_slice_1(): slice_ = Slice( origin=(0, 0, 0, 0), shape=Shape((4, 4, 4, 4), sig_dims=2), ) assert slice_.get() == ( slice(0, 4), slice(0, 4), slice(0, 4), slice(0, 4), )
def test_slice_intersect_3(): s1 = Slice( origin=(1, 1, 1, 1), shape=Shape((2, 2, 2, 2), sig_dims=2) ) s2 = Slice( origin=(0, 0, 0, 0), shape=Shape((4, 4, 4, 4), sig_dims=2) ) res = s1.intersection_with(s2) assert res == s1
def get_partitions(self): ds_shape = Shape(self.shape, sig_dims=self.sig_dims) ds_slice = Slice(origin=[0] * len(self.shape), shape=ds_shape) partition_shape = self.partition_shape( target_size=self.target_size, dtype=self.dtype, ) + tuple(self.shape.sig) for pslice in ds_slice.subslices(partition_shape): yield H5Partition( meta=self._meta, reader=self.get_reader(), partition_slice=pslice.flatten_nav(self.shape), slice_nd=pslice, )
def _get_tiles_normal(self, crop_to, full_frames, dest_dtype, target_size=None): start_at_frame = self._start_frame num_frames = self._num_frames sig_shape = self.meta.shape.sig sig_origin = tuple([0] * len(sig_shape)) if crop_to is not None: sig_origin = tuple(crop_to.origin[-sig_shape.dims:]) sig_shape = crop_to.shape.sig if full_frames: sig_shape = self.meta.shape.sig stackheight = self._get_stackheight(sig_shape=sig_shape, dest_dtype=dest_dtype, target_size=target_size) tile_buf_full = zeros_aligned((stackheight, ) + tuple(sig_shape), dtype=dest_dtype) tileshape = (stackheight, ) + tuple(sig_shape) with self._fileset as fileset: for outer_frame in range(start_at_frame, start_at_frame + num_frames, stackheight): if start_at_frame + num_frames - outer_frame < stackheight: end_frame = start_at_frame + num_frames current_stackheight = end_frame - outer_frame current_tileshape = ( current_stackheight, ) + tuple(sig_shape) tile_buf = zeros_aligned(current_tileshape, dtype=dest_dtype) else: current_stackheight = stackheight current_tileshape = tileshape tile_buf = tile_buf_full tile_slice = Slice(origin=(outer_frame, ) + sig_origin, shape=Shape(current_tileshape, sig_dims=sig_shape.dims)) if crop_to is not None: intersection = tile_slice.intersection_with(crop_to) if intersection.is_null(): continue fileset.read_images_multifile( start=outer_frame, stop=outer_frame + current_stackheight, out=tile_buf, crop_to=crop_to, ) yield DataTile(data=tile_buf, tile_slice=tile_slice)
def test_slice_intersect_2(): s1 = Slice( origin=(1, 1, 1, 1), shape=Shape((2, 2, 2, 2), sig_dims=2), ) s2 = Slice( origin=(0, 0, 0, 0), shape=Shape((1, 1, 1, 1), sig_dims=2), ) res = s1.intersection_with(s2) assert res == Slice( origin=(1, 1, 1, 1), shape=Shape((0, 0, 0, 0), sig_dims=2), ) assert res.is_null()
def get_partitions(self): ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) partition_shape = self.partition_shape( datashape=self.shape, framesize=self.shape[2] * self.shape[3], dtype=self.dtype, target_size=256*1024*1024, ) for pslice in ds_slice.subslices(partition_shape): yield BloPartition( tileshape=self._tileshape, meta=self._meta, reader=self.get_reader(), partition_slice=pslice, )
def get_macrotile(self, dest_dtype="float32", roi=None): ''' Return a single tile for the entire partition. This is useful to support process_partiton() in UDFs and to construct dask arrays from datasets. ''' tiling_scheme = TilingScheme.make_for_shape( tileshape=self.shape, dataset_shape=self.meta.shape, ) try: return next( self.get_tiles( tiling_scheme=tiling_scheme, dest_dtype=dest_dtype, roi=roi, )) except StopIteration: tile_slice = Slice( origin=(self.slice.origin[0], 0, 0), shape=Shape((0, ) + tuple(self.slice.shape.sig), sig_dims=2), ) return DataTile( np.zeros(tile_slice.shape, dtype=dest_dtype), tile_slice=tile_slice, scheme_idx=0, )
def _get_tiles_with_roi(self, roi, dest_dtype): flat_roi = roi.reshape((-1, )) roi = roi.reshape(self.meta.shape.nav) result_shape = Shape((1, ) + tuple(self.meta.shape.sig), sig_dims=self.meta.shape.sig.dims) sig_origin = tuple([0] * self.meta.shape.sig.dims) frames_read = 0 start_at_frame = self.slice.origin[0] frame_offset = np.count_nonzero(flat_roi[:start_at_frame]) indices = _roi_to_nd_indices(roi, self.slice_nd) with self.reader.get_h5ds() as h5ds: for idx in indices: tile_slice = Slice( origin=(frames_read + frame_offset, ) + sig_origin, shape=result_shape, ) tile_data = h5ds[idx].reshape(result_shape) self._preprocess(tile_data, tile_slice) yield DataTile( tile_data, tile_slice=tile_slice, # there is only a single slice in the tiling scheme, so our # scheme_idx is constant 0 scheme_idx=0, ) frames_read += 1