class LocalFSMMapBackend(IOBackend): def __init__(self, decoder=None, corrections: CorrectionSet = None): self._decoder = decoder self._corrections = corrections self._buffer_pool = BufferPool() def need_copy(self, roi, native_dtype, read_dtype, tiling_scheme=None, fileset=None): # checking conditions in which "straight mmap" is not possible # straight mmap means our dataset can just return views into the underlying mmap object # as tiles and use them as they are in the UDFs # 1) if a roi is given, straight mmap doesn't work because there are gaps in the navigation # axis: if roi is not None: log.debug("have roi, need copy") return True # 2) if we need to decode data, or do dtype conversion, we can't return # views into the underlying file: if self._need_decode(native_dtype, read_dtype): log.debug("have decode, need copy") return True # 3) if we have less frames per file than tile depth, we need to copy, too if tiling_scheme and fileset: fileset_arr = fileset.get_as_arr() if np.min(fileset_arr[:, 1] - fileset_arr[:, 0]) < tiling_scheme.depth: log.debug("too large for fileset, need copy") return True # 4) if we apply corrections, we need to copy if self._corrections is not None and self._corrections.have_corrections( ): log.debug("have corrections, need copy") return True return False def _need_decode(self, native_dtype, read_dtype): # FIXME: even with dtype "mismatch", we can possibly do dtype # conversion, if the tile size is small enough! maybe benchmark this # vs. _get_tiles_w_copy? if native_dtype != read_dtype: return True if self._decoder is not None: return True return False def _get_tiles_straight(self, tiling_scheme, fileset, read_ranges): """ Parameters ---------- fileset : FileSet To ensure best performance, should be limited to the files that are part of the current partition (otherwise we will spend more time finding the right file for a given frame index) read_ranges : Tuple[np.ndarray, np.ndarray] As returned by `get_read_ranges` """ ds_sig_shape = tiling_scheme.dataset_shape.sig sig_dims = tiling_scheme.shape.sig.dims slices, ranges, scheme_indices = read_ranges for idx in range(slices.shape[0]): origin, shape = slices[idx] tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] # FIXME: for straight mmap, read_ranges must not contain tiles # that span multiple files! file_idx = tile_ranges[0][0] fh = fileset[file_idx] memmap = fh.mmap().reshape((fh.num_frames, ) + tuple(ds_sig_shape)) tile_slice = Slice(origin=origin, shape=Shape(shape, sig_dims=sig_dims)) data_slice = (slice(origin[0] - fh.start_idx, origin[0] - fh.start_idx + shape[0]), ) + tuple([ slice(o, (o + s)) for (o, s) in zip(origin[1:], shape[1:]) ]) data = memmap[data_slice] yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_read_and_decode(self, decode): key = (decode, ) if key in _r_n_d_cache: return _r_n_d_cache[key] r_n_d = _make_mmap_reader_and_decoder(decode=decode) _r_n_d_cache[key] = r_n_d return r_n_d def preprocess(self, data, tile_slice): if self._corrections is None: return self._corrections.apply(data, tile_slice) def _get_tiles_w_copy(self, tiling_scheme, fileset, read_ranges, read_dtype, native_dtype): if self._decoder is not None: decoder = self._decoder else: decoder = DtypeConversionDecoder() decode = decoder.get_decode( native_dtype=np.dtype(native_dtype), read_dtype=np.dtype(read_dtype), ) r_n_d = self._r_n_d = self.get_read_and_decode(decode) native_dtype = decoder.get_native_dtype(native_dtype, read_dtype) mmaps = List() for fh in fileset: mmaps.append(np.frombuffer(fh.raw_mmap(), dtype=np.uint8)) sig_dims = tiling_scheme.shape.sig.dims ds_shape = np.array(tiling_scheme.dataset_shape) largest_slice = sorted([(np.prod(s_.shape), s_) for _, s_ in tiling_scheme.slices], key=lambda x: x[0], reverse=True)[0][1] buf_shape = (tiling_scheme.depth, ) + tuple(largest_slice.shape) need_clear = decoder.do_clear() with self._buffer_pool.empty(buf_shape, dtype=read_dtype) as out_decoded: out_decoded = out_decoded.reshape((-1, )) slices = read_ranges[0] ranges = read_ranges[1] scheme_indices = read_ranges[2] for idx in range(slices.shape[0]): origin, shape = slices[idx] tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] out_cut = out_decoded[:np.prod(shape)].reshape((shape[0], -1)) data = r_n_d( idx, mmaps, sig_dims, tile_ranges, out_cut, native_dtype, do_zero=need_clear, origin=origin, shape=shape, ds_shape=ds_shape, ) tile_slice = Slice(origin=origin, shape=Shape(shape, sig_dims=sig_dims)) data = data.reshape(shape) self.preprocess(data, tile_slice) yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_tiles(self, tiling_scheme, fileset, read_ranges, roi, native_dtype, read_dtype): # TODO: how would compression work? # TODO: sparse input data? COO format? fill rate? → own pipeline! → later! # strategy: assume low (20%?) fill rate, read whole partition and apply ROI in-memory # partitioning when opening the dataset, or by having one file per partition with fileset: self._set_readahead_hints(roi, fileset) if not self.need_copy( tiling_scheme=tiling_scheme, fileset=fileset, roi=roi, native_dtype=native_dtype, read_dtype=read_dtype, ): yield from self._get_tiles_straight(tiling_scheme, fileset, read_ranges) else: yield from self._get_tiles_w_copy( tiling_scheme=tiling_scheme, fileset=fileset, read_ranges=read_ranges, read_dtype=read_dtype, native_dtype=native_dtype, ) def _set_readahead_hints(self, roi, fileset): if not hasattr(os, 'posix_fadvise'): return if any([f.fileno() is None for f in fileset]): return if roi is None: for f in fileset: os.posix_fadvise( f.fileno(), 0, 0, os.POSIX_FADV_SEQUENTIAL | os.POSIX_FADV_WILLNEED) else: for f in fileset: os.posix_fadvise(f.fileno(), 0, 0, os.POSIX_FADV_RANDOM | os.POSIX_FADV_WILLNEED)
class BufferedBackendImpl(IOBackendImpl): def __init__(self, max_buffer_size): super().__init__() self._max_buffer_size = max_buffer_size self._buffer_pool = BufferPool() def need_copy( self, decoder, roi, native_dtype, read_dtype, tiling_scheme=None, fileset=None, sync_offset=0, corrections=None, ): return True # we always copy in this backend def get_read_and_decode(self, decode): key = (decode, "read") if key in _r_n_d_cache: return _r_n_d_cache[key] r_n_d = _make_buffered_reader_and_decoder(decode=decode) _r_n_d_cache[key] = r_n_d return r_n_d def get_max_io_size(self): return self._max_buffer_size def _get_tiles_by_block( self, tiling_scheme, fileset, read_ranges, read_dtype, native_dtype, decoder=None, corrections=None, sync_offset=0, ): if decoder is None: decoder = DtypeConversionDecoder() decode = decoder.get_decode( native_dtype=np.dtype(native_dtype), read_dtype=np.dtype(read_dtype), ) r_n_d = self._r_n_d = self.get_read_and_decode(decode) native_dtype = decoder.get_native_dtype(native_dtype, read_dtype) sig_dims = tiling_scheme.shape.sig.dims ds_shape = np.array(tiling_scheme.dataset_shape) largest_slice = sorted([ (np.prod(s_.shape), s_) for _, s_ in tiling_scheme.slices ], key=lambda x: x[0], reverse=True)[0][1] buf_shape = (tiling_scheme.depth,) + tuple(largest_slice.shape) need_clear = decoder.do_clear() slices = read_ranges[0] shape_prods = np.prod(slices[..., 1, :], axis=1) ranges = read_ranges[1] scheme_indices = read_ranges[2] tile_block_size = len(tiling_scheme) with self._buffer_pool.empty(buf_shape, dtype=read_dtype) as out_decoded: out_decoded = out_decoded.reshape((-1,)) for block_idx in range(0, slices.shape[0], tile_block_size): block_ranges = ranges[block_idx:block_idx + tile_block_size] fill_factor, req_buf_size, min_per_file, max_per_file = block_get_min_fill_factor( block_ranges ) # TODO: if it makes sense, implement sparse variant # if req_buf_size > self._max_buffer_size or fill_factor < self._sparse_threshold: yield from self._read_block_dense( block_idx, tile_block_size, min_per_file, max_per_file, fileset, slices, ranges, scheme_indices, shape_prods, out_decoded, r_n_d, sig_dims, ds_shape, need_clear, native_dtype, corrections, ) def _read_block_dense( self, block_idx, tile_block_size, min_per_file, max_per_file, fileset, slices, ranges, scheme_indices, shape_prods, out_decoded, r_n_d, sig_dims, ds_shape, need_clear, native_dtype, corrections, ): """ Reads a block of tiles, starting at `block_idx`, having a size of `tile_block_size` read range entries. """ # phase 1: read buffers = Dict() for fileno in min_per_file.keys(): fh = fileset[fileno] read_size = max_per_file[fileno] - min_per_file[fileno] # FIXME: re-use buffers buffers[fileno] = np.zeros(read_size, dtype=np.uint8) # FIXME: file header offset handling is a bit weird # FIXME: maybe file header offset should be folded into the read ranges instead? fh.seek(min_per_file[fileno] + fh._file_header) fh.readinto(buffers[fileno]) # phase 2: decode tiles from the data that was read for idx in range(block_idx, block_idx + tile_block_size): origin = slices[idx, 0] shape = slices[idx, 1] tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] out_cut = out_decoded[:shape_prods[idx]].reshape((shape[0], -1)) data = r_n_d( idx, buffers, sig_dims, tile_ranges, out_cut, native_dtype, do_zero=need_clear, origin=origin, shape=shape, ds_shape=ds_shape, offsets=min_per_file, ) tile_slice = Slice( origin=origin, shape=Shape(shape, sig_dims=sig_dims) ) data = data.reshape(shape) self.preprocess(data, tile_slice, corrections) yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_tiles( self, tiling_scheme, fileset, read_ranges, roi, native_dtype, read_dtype, decoder, sync_offset, corrections, ): with fileset: yield from self._get_tiles_by_block( tiling_scheme=tiling_scheme, fileset=fileset, read_ranges=read_ranges, read_dtype=read_dtype, native_dtype=native_dtype, decoder=decoder, corrections=corrections, sync_offset=sync_offset, )
class MMapBackendImpl(IOBackendImpl): def __init__(self, enable_readahead_hints=False): super().__init__() self._enable_readahead = enable_readahead_hints self._buffer_pool = BufferPool() def _get_tiles_straight(self, tiling_scheme, fileset, read_ranges, sync_offset=0): """ Read straight from the file system cache, via memory mapping, without any decoding step. This method makes a few assumptions: - no corrections are needed - tiles don't span multiple files - the `LocalFile` has already cut away headers and footers (both per-file and per-frame) from the mmap Parameters ---------- fileset : FileSet The fileset must correspond to the indices used in the `read_ranges`. Usually, that means it is limited to the files that are part of the current partition. read_ranges : Tuple[np.ndarray, np.ndarray, np.ndarray] As returned by `get_read_ranges` """ ds_sig_shape = tuple(tiling_scheme.dataset_shape.sig) sig_dims = tiling_scheme.shape.sig.dims slices, ranges, scheme_indices = read_ranges for idx in range(slices.shape[0]): origin, shape = slices[idx] tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] # NOTE: for straight mmap, read_ranges must not contain tiles # that span multiple files. This is ensured in IOBackend.need_copy # (that is, we force copying if files are smaller than tiles) file_idx = tile_ranges[0][0] fh = fileset[file_idx] memmap = fh.mmap().reshape((fh.num_frames,) + ds_sig_shape) tile_slice = Slice( origin=origin, shape=Shape(shape, sig_dims=sig_dims) ) # sync_offset is either zero or positive # in case of negative sync_offset, _get_tiles_w_copy is used data_slice = ( slice( origin[0] - fh.start_idx + sync_offset, origin[0] - fh.start_idx + shape[0] + sync_offset ), ) + tuple([ slice(o, (o + s)) for (o, s) in zip(origin[1:], shape[1:]) ]) data = memmap[data_slice] yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_read_and_decode(self, decode): key = (decode, "mmap") if key in _r_n_d_cache: return _r_n_d_cache[key] r_n_d = _make_mmap_reader_and_decoder(decode=decode) _r_n_d_cache[key] = r_n_d return r_n_d def _get_tiles_w_copy( self, tiling_scheme, fileset, read_ranges, read_dtype, native_dtype, decoder=None, corrections=None, ): if decoder is None: decoder = DtypeConversionDecoder() decode = decoder.get_decode( native_dtype=np.dtype(native_dtype), read_dtype=np.dtype(read_dtype), ) r_n_d = self._r_n_d = self.get_read_and_decode(decode) native_dtype = decoder.get_native_dtype(native_dtype, read_dtype) mmaps = List() for fh in fileset: mmaps.append(np.frombuffer(fh.raw_mmap(), dtype=np.uint8)) sig_dims = tiling_scheme.shape.sig.dims ds_shape = np.array(tiling_scheme.dataset_shape) largest_slice = sorted([ (np.prod(s_.shape), s_) for _, s_ in tiling_scheme.slices ], key=lambda x: x[0], reverse=True)[0][1] buf_shape = (tiling_scheme.depth,) + tuple(largest_slice.shape) need_clear = decoder.do_clear() with self._buffer_pool.empty(buf_shape, dtype=read_dtype) as out_decoded: out_decoded = out_decoded.reshape((-1,)) slices = read_ranges[0] shape_prods = np.prod(slices[..., 1, :], axis=1) ranges = read_ranges[1] scheme_indices = read_ranges[2] for idx in range(slices.shape[0]): origin = slices[idx, 0] shape = slices[idx, 1] tile_slice = Slice( origin=origin, shape=Shape(shape, sig_dims=sig_dims) ) tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] # if idx < slices.shape[0] - 1: # self._prefetch_for_tile(fileset, ranges[idx + 1]) # pass out_cut = out_decoded[:shape_prods[idx]].reshape((shape[0], -1)) data = r_n_d( idx, mmaps, sig_dims, tile_ranges, out_cut, native_dtype, do_zero=need_clear, origin=origin, shape=shape, ds_shape=ds_shape, ) data = data.reshape(shape) self.preprocess(data, tile_slice, corrections) yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_tiles( self, tiling_scheme, fileset, read_ranges, roi, native_dtype, read_dtype, decoder, sync_offset, corrections, ): # TODO: how would compression work? # TODO: sparse input data? COO format? fill rate? → own pipeline! → later! # strategy: assume low (20%?) fill rate, read whole partition and apply ROI in-memory # partitioning when opening the dataset, or by having one file per partition with fileset: if self._enable_readahead: self._set_readahead_hints(roi, fileset) if not self.need_copy( decoder=decoder, tiling_scheme=tiling_scheme, fileset=fileset, roi=roi, native_dtype=native_dtype, read_dtype=read_dtype, sync_offset=sync_offset, corrections=corrections, ): yield from self._get_tiles_straight( tiling_scheme, fileset, read_ranges, sync_offset, ) else: yield from self._get_tiles_w_copy( tiling_scheme=tiling_scheme, fileset=fileset, read_ranges=read_ranges, read_dtype=read_dtype, native_dtype=native_dtype, decoder=decoder, corrections=corrections, ) def _prefetch_for_tile(self, fileset, tile_ranges): prefr = _get_prefetch_ranges(len(fileset), tile_ranges) prefr = prefr[~np.all(prefr == 0, axis=1)] for mi, ma, fidx in prefr: f = fileset[fidx] os.posix_fadvise( f.fileno(), mi, ma - mi, os.POSIX_FADV_WILLNEED ) def _set_readahead_hints(self, roi, fileset): if not hasattr(os, 'posix_fadvise'): return if any([f.fileno() is None for f in fileset]): return for f in fileset: os.posix_fadvise( f.fileno(), 0, 0, os.POSIX_FADV_WILLNEED )
class BufferedBackendImpl(IOBackendImpl): def __init__(self, max_buffer_size, direct_io=False): super().__init__() self._max_buffer_size = max_buffer_size self._direct_io = direct_io self._buffer_pool = BufferPool() @contextlib.contextmanager def open_files(self, fileset: FileSet): cls: Type[BufferedFile] if self._direct_io: cls = DirectBufferedFile else: cls = BufferedFile files = [ cls(path=f.path, desc=f).open() for f in fileset ] yield files for f in files: f.close() def need_copy( self, decoder, roi, native_dtype, read_dtype, tiling_scheme=None, fileset=None, sync_offset=0, corrections=None, ): return True # we always copy in this backend def get_read_and_decode(self, decode): key = (decode, "read") if key in _r_n_d_cache: return _r_n_d_cache[key] r_n_d = _make_buffered_reader_and_decoder(decode=decode) _r_n_d_cache[key] = r_n_d return r_n_d def get_max_io_size(self): return self._max_buffer_size def _get_tiles_by_block( self, tiling_scheme, open_files, read_ranges, read_dtype, native_dtype, decoder=None, corrections=None, sync_offset=0, ): if decoder is None: decoder = DtypeConversionDecoder() decode = decoder.get_decode( native_dtype=np.dtype(native_dtype), read_dtype=np.dtype(read_dtype), ) r_n_d = self._r_n_d = self.get_read_and_decode(decode) native_dtype = decoder.get_native_dtype(native_dtype, read_dtype) sig_dims = tiling_scheme.shape.sig.dims ds_shape = np.array(tiling_scheme.dataset_shape) largest_slice = sorted(( (prod(s_.shape), s_) for _, s_ in tiling_scheme.slices ), key=lambda x: x[0], reverse=True)[0][1] buf_shape = (tiling_scheme.depth,) + tuple(largest_slice.shape) need_clear = decoder.do_clear() slices = read_ranges[0] # Use NumPy prod for multidimensional array and axis parameter shape_prods = np.prod(slices[..., 1, :], axis=1, dtype=np.int64) ranges = read_ranges[1] scheme_indices = read_ranges[2] tile_block_size = len(tiling_scheme) with self._buffer_pool.empty(buf_shape, dtype=read_dtype) as out_decoded: out_decoded = out_decoded.reshape((-1,)) for block_idx in range(0, slices.shape[0], tile_block_size): block_ranges = ranges[block_idx:block_idx + tile_block_size] fill_factor, req_buf_size, min_per_file, max_per_file = block_get_min_fill_factor( block_ranges ) # TODO: if it makes sense, implement sparse variant # if req_buf_size > self._max_buffer_size or fill_factor < self._sparse_threshold: yield from self._read_block_dense( block_idx, tile_block_size, min_per_file, max_per_file, open_files, slices, ranges, scheme_indices, shape_prods, out_decoded, r_n_d, sig_dims, ds_shape, need_clear, native_dtype, corrections, ) def _read_block_dense( self, block_idx, tile_block_size, min_per_file, max_per_file, open_files, slices, ranges, scheme_indices, shape_prods, out_decoded, r_n_d, sig_dims, ds_shape, need_clear, native_dtype, corrections, ): """ Reads a block of tiles, starting at `block_idx`, having a size of `tile_block_size` read range entries. """ # phase 1: read buffers = Dict() # this list manages the lifetime of the ManagedBuffer instances; # after `buf_ref` goes out of scope, the buffers are returned to # the buffer pool, so make sure that this matches with the usage # of the buffers! buf_ref = [] for fileno in min_per_file.keys(): fh = open_files[fileno] # add align_to to allow for alignment cut: align_to = fh.get_blocksize() read_size = max_per_file[fileno] - min_per_file[fileno] + align_to # ManagedBuffer gives us memory in 4k blocks, so the size is 4k aligned mb = ManagedBuffer(self._buffer_pool, read_size, alignment=fh.get_blocksize()) arr = np.frombuffer(mb.buf, dtype=np.uint8) buf_ref.append(mb) seek_pos = min_per_file[fileno] alignment = 0 # seek_pos needs to be aligned to 4k block size, too: if seek_pos % align_to != 0: alignment = seek_pos % align_to seek_pos = align_to * (seek_pos // align_to) fh.seek(seek_pos) read_result = fh.readinto(arr) # read may be truncated, if the buffer is larger than the file; we # truncate the buffer, too, to make sure we don't use any # uninitialized values. Also cut off `alignment` bytes at the beginning, # which were read to make O_DIRECT happy: buffers[fileno] = read_result[alignment:] # phase 2: decode tiles from the data that was read for idx in range(block_idx, block_idx + tile_block_size): origin = slices[idx, 0] shape = slices[idx, 1] tile_ranges = ranges[idx] scheme_idx = scheme_indices[idx] out_cut = out_decoded[:shape_prods[idx]].reshape((shape[0], -1)) data = r_n_d( idx, buffers, sig_dims, tile_ranges, out_cut, native_dtype, do_zero=need_clear, origin=origin, shape=shape, ds_shape=ds_shape, offsets=min_per_file, ) tile_slice = Slice( origin=origin, shape=Shape(shape, sig_dims=sig_dims) ) data = data.reshape(shape) self.preprocess(data, tile_slice, corrections) yield DataTile( data, tile_slice=tile_slice, scheme_idx=scheme_idx, ) def get_tiles( self, tiling_scheme, fileset, read_ranges, roi, native_dtype, read_dtype, decoder, sync_offset, corrections, ): with self.open_files(fileset) as open_files: yield from self._get_tiles_by_block( tiling_scheme=tiling_scheme, open_files=open_files, read_ranges=read_ranges, read_dtype=read_dtype, native_dtype=native_dtype, decoder=decoder, corrections=corrections, sync_offset=sync_offset, )