def open_file(self): f = np.memmap(self._path, dtype=self.dtype, mode='r', shape=self._scan_size + self._detector_size_raw) ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) return f[ds_slice.get()] # crop off the two extra rows
def get_partitions(self): ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) for pslice in ds_slice.subslices(self.partition_shape): yield MemoryPartition( tileshape=self.tileshape, dataset=self, dtype=self.dtype, partition_slice=pslice, )
def test_subslices_non_even_division_2(): top_slice = Slice( origin=(0, 0, 0, 0), shape=(3, 1, 1, 1), ) assert list(top_slice.subslices(shape=(2, 1, 1, 1))) == [ Slice(origin=(0, 0, 0, 0), shape=(2, 1, 1, 1)), Slice(origin=(2, 0, 0, 0), shape=(1, 1, 1, 1)), ]
def test_shift_2(): s1 = Slice( origin=(2, 2, 0, 0), shape=(1, 1, 2, 2), ) s2 = Slice(origin=(1, 1, 0, 0), shape=(1, 1, 4, 4)) shifted = s1.shift(s2) assert shifted.origin == (1, 1, 0, 0)
def test_slice_intersect_0(): s1 = Slice( origin=(0, 0, 0, 0), shape=(2, 2, 2, 2), ) s2 = Slice( origin=(0, 0, 0, 0), shape=(1, 1, 1, 1), ) assert s1.intersection_with(s2) == s2
def test_slice_intersect_3(): s1 = Slice( origin=(1, 1, 1, 1), shape=(2, 2, 2, 2), ) s2 = Slice( origin=(0, 0, 0, 0), shape=(4, 4, 4, 4), ) res = s1.intersection_with(s2) assert res == s1
def test_get_slice_2(): slice_ = Slice( origin=(1, 1, 1, 1), shape=(1, 1, 2, 2), ) data = np.arange(4 * 4 * 4 * 4).reshape(4, 4, 4, 4) assert slice_.get(data).shape == slice_.shape assert np.all(slice_.get(data) == np.array([[[ [85, 86], [89, 90], ]]]))
def test_get_slice_1(): slice_ = Slice( origin=(0, 0, 0, 0), shape=(4, 4, 4, 4), ) assert slice_.get() == ( slice(0, 4), slice(0, 4), slice(0, 4), slice(0, 4), )
def test_subslice_from_offset_length_1(): s1 = Slice( origin=(1, 1, 1, 1), shape=(2, 2, 2, 2), ) sub1 = s1.subslice_from_offset(offset=0, length=2) assert sub1.origin == (1, 1, 1, 1) assert sub1.shape == (1, 2, 2, 2) sub2 = s1.subslice_from_offset(offset=0, length=4) assert sub2.origin == (1, 1, 1, 1) assert sub2.shape == (2, 2, 2, 2)
def test_subslice_from_offset_length_3(): s1 = Slice( origin=(0, 0, 0, 0), shape=(4, 4, 2, 2), ) # can also create subslice that is smaller than one row: sub3 = s1.subslice_from_offset(offset=0, length=1) assert sub3.origin == (0, 0, 0, 0) assert sub3.shape == (1, 1, 2, 2) sub3 = s1.subslice_from_offset(offset=1, length=1) assert sub3.origin == (0, 1, 0, 0) assert sub3.shape == (1, 1, 2, 2)
def get_partitions(self): ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) partition_shape = Slice.partition_shape( datashape=self.shape, framesize=self.shape[2] * self.shape[3], dtype=self.dtype, target_size=256 * 1024 * 1024, ) for pslice in ds_slice.subslices(partition_shape): yield BloPartition( tileshape=self._tileshape, dataset=self, dtype=self.dtype, partition_slice=pslice, )
def test_slice_intersect_2(): s1 = Slice( origin=(1, 1, 1, 1), shape=(2, 2, 2, 2), ) s2 = Slice( origin=(0, 0, 0, 0), shape=(1, 1, 1, 1), ) res = s1.intersection_with(s2) assert res == Slice( origin=(1, 1, 1, 1), shape=(0, 0, 0, 0), ) assert res.is_null()
def test_for_datatile_1(masks): tile = DataTile( tile_slice=Slice(origin=(0, 0, 0, 0), shape=(1, 1, 1, 1)), data=np.ones((1, 1, 1, 1)) ) slice_ = masks.get_masks_for_slice(tile.tile_slice) assert slice_.shape == (1, 4)
async def pick_frame(self, dataset_uuid, x, y): ds = self.data.get_dataset(dataset_uuid) x = int(x) y = int(y) slice_ = Slice(origin=(y, x, 0, 0), shape=(1, 1, ds.shape[2], ds.shape[3])) job = PickFrameJob(dataset=ds, slice_=slice_) executor = self.data.get_executor() log.info("picking %d/%d from %s", x, y, dataset_uuid) futures = [] for task in job.get_tasks(): submit_kwargs = {} futures.append(executor.client.submit(task, **submit_kwargs)) full_result = np.zeros(shape=ds.shape[2:]) async for future, result in dd.as_completed(futures, with_results=True): for tile in result: tile.copy_to_result(full_result) log.info("picking done, encoding image (dtype=%s)", full_result.dtype) image = await run_blocking( _encode_image, full_result, colormap=cm.gist_earth, save_kwargs={'format': 'png'}, ) log.info("image encoded, sending response") return image.read()
def _get_slice(self, slice: Slice): real_slice = slice.get() result = self._data[real_slice] # Defend against #1026 (internal bugs), allow deactivating in # optimized builds for performance assert result.shape == tuple(slice.shape) + self.extra_shape return result
def get_tiles(self): """ yield one tile per underlying data block """ s = self._get_sector() scan = self._scan_size try: all_blocks = s.get_blocks() blocks_to_read = (BLOCKS_PER_SECTOR_PER_FRAME * scan[0] * scan[1]) buf = np.zeros((1, 1) + BLOCK_SHAPE, dtype="uint16") for block_idx, block in enumerate( itertools.islice(all_blocks, blocks_to_read)): frame_idx = block_idx // BLOCKS_PER_SECTOR_PER_FRAME scan_pos_y = frame_idx // scan[1] scan_pos_x = frame_idx % scan[1] h = block.header # TODO: move tile_slice stuff to datablock? sector_offset = SECTOR_SIZE[1] * block.sector.idx tile_slice = Slice( origin=(scan_pos_y, scan_pos_x, h['pixel_y_start'], sector_offset + h['pixel_x_start']), shape=(1, 1) + BLOCK_SHAPE, ) log.debug("tile_slice=%r", tile_slice) block.readinto(buf) yield DataTile(data=buf, tile_slice=tile_slice) finally: s.close()
def _slice_for_partition(self, partition): """ Get a Slice into self._data for `partition`, taking the current ROI into account. Because _data is "compressed" if a ROI is set, we can't directly index and must calculate a new slice from the ROI. """ if self._roi is None: return partition.slice else: roi = self._roi.reshape((-1, )) slice_ = partition.slice s_o = slice_.origin[0] s_s = slice_.shape[0] # We need to find how many 1s there are for all previous partitions, to know # the origin; then we count how many 1s there are in our partition # to find our shape. origin = np.count_nonzero(roi[:s_o]) shape = np.count_nonzero(roi[s_o:s_o + s_s]) sig_dims = slice_.shape.sig.dims slice_ = Slice( origin=(origin, ) + slice_.origin[-sig_dims:], shape=Shape((shape, ) + tuple(slice_.shape.sig), sig_dims=sig_dims), ) return slice_
def test_for_datatile_with_scan_origin(masks): tile = DataTile( tile_slice=Slice(origin=(10, 10, 0, 0), shape=(2, 2, 10, 10)), data=np.ones((2, 2, 10, 10)) ) slice_ = masks.get_masks_for_slice(tile.tile_slice) assert slice_.shape == (100, 4)
def test_for_datatile_with_frame_origin(masks): tile = DataTile(tile_slice=Slice(origin=(10, 10, 10, 10), shape=(2, 2, 1, 5)), data=np.ones((2, 2, 1, 5))) slice_ = masks.get_masks_for_slice(tile.tile_slice) print(slice_) assert_array_almost_equal( slice_, np.array([ 1, 0, 1, 10, 1, 0, 1, 11, 1, 0, 1, 12, 1, 0, 1, 13, 1, 0, 1, 14, ]).reshape((5, 4)))
def get_partitions(self): ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) partition_shape = Slice.partition_shape( datashape=self.shape, framesize=self._detector_size[0] * self._detector_size[1], dtype=self.dtype, target_size=256*1024*1024, min_num_partitions=self._min_num_partitions, ) for pslice in ds_slice.subslices(partition_shape): # TODO: where should the tileshape be set? let the user choose for now yield RawFilePartition( tileshape=self._tileshape, dataset=self, dtype=self.dtype, partition_slice=pslice, )
def test_write_handle(tmpdir_factory): """ test the common "happy path": """ datadir = tmpdir_factory.mktemp('write_handle_tests') full_path = os.path.join(datadir, "f1") part_slice = Slice( shape=Shape((32, 64, 64), sig_dims=2), origin=(16, 0, 0), ) tile_slice = Slice( shape=Shape((3, 64, 64), sig_dims=2), origin=(19, 0, 0), ) tile_data = np.random.randn(3, 64, 64).astype("float32") tile = DataTile( tile_data, tile_slice=tile_slice, scheme_idx=0, ) wh = WriteHandle(full_path, datadir, part_slice, dtype='float32') tmp_file_name = "" with wh: wh.write_tile(tile) tmp_file_name = wh._tmp_file.name assert os.path.exists(tmp_file_name) # check some internals: assert wh._dest is None assert wh._tmp_file is None # the temporary file should no longer exist in case of success assert not os.path.exists(tmp_file_name) # ... buf our dest fname should: assert os.path.exists(full_path) assert os.path.isfile(full_path) # check if data is written correctly: read_data = np.fromfile(full_path, dtype="float32").reshape(part_slice.shape) assert np.allclose(read_data[3:6, ...], tile_data)
def get_partitions(self): """ we keep it simple: one MIB file == one partition """ ds_slice = Slice(origin=(0, 0, 0, 0), shape=self.shape) for f in self._files_sorted(): idx = f.fields['sequence_first_image'] - 1 length = f.fields['num_images'] pslice = ds_slice.subslice_from_offset(offset=idx, length=length) yield MIBPartition( tileshape=self._tileshape, dataset=self, partfile=f, dtype=self.dtype, partition_slice=pslice, )
def get_partitions(self): with self.get_h5ds() as h5ds: ds_slice = Slice(origin=(0, 0, 0, 0), shape=h5ds.shape) partition_shape = Slice.partition_shape( datashape=h5ds.shape, framesize=h5ds[0][0].size, dtype=h5ds.dtype, target_size=self.target_size, min_num_partitions=self.min_num_partitions, ) dtype = h5ds.dtype for pslice in ds_slice.subslices(partition_shape): # TODO: where should the tileshape be set? let the user choose for now yield H5Partition( tileshape=self.tileshape, dataset=self, dtype=dtype, partition_slice=pslice, )
def get_partitions(self): for partition in self._index['partitions']: yield BinaryHDFSPartition( path=os.path.join(self.dirname, partition['filename']), tileshape=self.tileshape, dataset=self, dtype=self._index['dtype'], partition_slice=Slice(origin=partition['origin'], shape=partition['shape']), )
def test_subslices_simple(): top_slice = Slice( origin=(0, 0, 0, 0), shape=(4, 4, 4, 4), ) assert list(top_slice.subslices(shape=(2, 2, 4, 4))) == [ Slice(origin=(0, 0, 0, 0), shape=(2, 2, 4, 4)), Slice(origin=(0, 2, 0, 0), shape=(2, 2, 4, 4)), Slice(origin=(2, 0, 0, 0), shape=(2, 2, 4, 4)), Slice(origin=(2, 2, 0, 0), shape=(2, 2, 4, 4)), ]
def test_write_handle_exception(tmpdir_factory): datadir = tmpdir_factory.mktemp('write_handle_tests') full_path = os.path.join(datadir, "f1") part_slice = Slice( shape=Shape((32, 64, 64), sig_dims=2), origin=(16, 0, 0), ) tile_slice = Slice( shape=Shape((3, 64, 64), sig_dims=2), origin=(19, 0, 0), ) tile_data = np.random.randn(3, 64, 64).astype("float32") tile = DataTile( tile_data, tile_slice=tile_slice, scheme_idx=0, ) wh = WriteHandle(full_path, datadir, part_slice, dtype='float32') tmp_file_name = "" with pytest.raises(Exception): with wh: wh.write_tile(tile) tmp_file_name = wh._tmp_file.name assert os.path.exists(tmp_file_name) raise Exception("nope") # check some internals: assert wh._dest is None assert wh._tmp_file is None # the temporary file should no longer exist in case of exception assert not os.path.exists(tmp_file_name) # and neither should the full destination path assert not os.path.exists(full_path)
def make_index(self, data, dtype, min_num_partitions=16, target_size=512 * 1024 * 1024): """ create the json-serializable index structure. decides about the concrete partitioning, which will later be used to split the input data """ partition_shape = Slice.partition_shape( datashape=data.shape, framesize=data[0][0].size, dtype=dtype, min_num_partitions=min_num_partitions, target_size=target_size, ) partitions = self.make_partitions( data=data, partition_shape=partition_shape, ) fname_fmt = "partition-%(idx)08d.raw" index = { "dtype": str(dtype), "mode": "rect", "shape": data.shape, "partitions": [{ "origin": p['origin'], "shape": p['shape'], "filename": fname_fmt % { "idx": i }, } for (i, p) in enumerate(partitions)] } return index
def test_copy_to_result(): # result tile: for three masks, insert all ones into the given position: res_tile = ResultTile( data=np.ones(( 4, # xdim*ydim, flattened 3, # num masks )), tile_slice=Slice(origin=(2, 2, 0, 0), shape=(1, 4, 10, 10)), ) result = np.zeros(( 3, # num masks 10, # ydim 10) # xdim ) res_tile.copy_to_result(result) res_tile.copy_to_result(result) print(result) dest_slice = res_tile._get_dest_slice() assert dest_slice[0] == Ellipsis assert dest_slice[1] == slice(2, 3, None) assert dest_slice[2] == slice(2, 6, None) assert len(dest_slice) == 3 # let's see if we can select the right slice: assert result[..., 2:3, 2:6].shape == (3, 1, 4) # the region selected above should be 2: assert np.all(result[..., 2:3, 2:6] == 2) # everything else should be 0: assert np.all(result[..., 2:3, :2] == 0) assert np.all(result[..., 2:3, 6:] == 0) assert np.all(result[..., :2, :] == 0) assert np.all(result[..., 3:, :] == 0)
def encode_roundtrip_quad(encode, bits_per_pixel, input_data=None, dataset_shape=None, tileshape=None, start_at_frame=2, stop_before_frame=6): if dataset_shape is None: # make some read ranges: dataset_shape = (6, 512, 512) dataset_shape = Shape(dataset_shape, sig_dims=2) if tileshape is None: tileshape = (2, 128, 512) tiling_scheme = TilingScheme.make_for_shape( dataset_shape=dataset_shape, tileshape=Shape(tileshape, sig_dims=2), ) sync_offset = 0 roi = None frame_header_bytes = 768 image_size_bytes = dataset_shape.sig.size * bits_per_pixel // 8 if bits_per_pixel in (1, 8): native_dtype = np.uint8 elif bits_per_pixel == 16: native_dtype = np.uint16 fields: HeaderDict = { 'header_size_bytes': frame_header_bytes, 'dtype': native_dtype, 'mib_dtype': 'R64', 'mib_kind': 'r', # remove padding from `bits_per_pixel` 'bits_per_pixel': { 1: 1, 8: 6, 16: 12 }[bits_per_pixel], 'image_size': (512, 512), 'image_size_bytes': image_size_bytes, 'sequence_first_image': 1, 'filesize': dataset_shape.nav.size * (image_size_bytes + frame_header_bytes), 'num_images': dataset_shape.nav.size, 'num_chips': 4, 'sensor_layout': (2, 2), } file = MIBFile( path="", start_idx=0, end_idx=dataset_shape.nav.size, native_dtype=native_dtype, sig_shape=dataset_shape.sig, frame_header=frame_header_bytes, file_header=0, header=fields, ) fileset = MIBFileSet(files=[file], header=fields, frame_header_bytes=frame_header_bytes) backend = MMapBackendImplInMem() max_value = (1 << bits_per_pixel) - 1 if input_data is None: data_full = np.random.randint(0, max_value + 1, tuple(dataset_shape.flatten_nav())) # make sure min/max values are indeed hit: data_full.reshape((-1, ))[0] = max_value data_full.reshape((-1, ))[-1] = 0 assert np.max(data_full) == max_value assert np.min(data_full) == 0 else: data_full = input_data.reshape(dataset_shape.flatten_nav()) data = data_full[start_at_frame:stop_before_frame] # we need headers in-between, in contrast to the frame-by-frame decoding, the decoder # expects contiguous input data and we can't slice them away beforehand: encoded_data = encode_quad(encode, data_full, bits_per_pixel, with_headers=True) decoded = np.zeros_like(data) # that's the "interface" we made up for the in-mem mmap file above: file.data = encoded_data # wrapping the numba decoder function: decoder = MIBDecoder(header=fields) outer_slice = Slice( origin=(start_at_frame, 0, 0), shape=dataset_shape.flatten_nav(), ) read_ranges = fileset.get_read_ranges( start_at_frame=start_at_frame, stop_before_frame=stop_before_frame, dtype=native_dtype, tiling_scheme=tiling_scheme, sync_offset=sync_offset, roi=roi, ) for tile in backend.get_tiles( tiling_scheme=tiling_scheme, fileset=fileset, read_ranges=read_ranges, roi=roi, native_dtype=np.uint8, read_dtype=np.float32, decoder=decoder, sync_offset=0, corrections=None, ): slice_shifted = tile.tile_slice.shift(outer_slice) decoded[slice_shifted.get()] = tile.reshape(tile.tile_slice.shape) assert_allclose(data, decoded) return data, decoded
def disjoint(sl: Slice, slices: Iterable[Slice]): return all(sl.intersection_with(s2).is_null() for s2 in slices)