def chunked_emd(tmpdir_factory): lt_ctx = Context(executor=InlineJobExecutor()) datadir = tmpdir_factory.mktemp('hdf5_chunked_data') filename = os.path.join(datadir, 'chunked.emd') chunks = (32, 32, 128, 128) with h5py.File(filename, mode="w") as f: f.attrs.create('version_major', 0) f.attrs.create('version_minor', 2) f.create_group('experimental/science_data') group = f['experimental/science_data'] group.attrs.create('emd_group_type', 1) data = np.ones((256, 256, 128, 128), dtype=np.float32) group.create_dataset(name='data', data=data, chunks=chunks) group.create_dataset(name='dim1', data=range(256)) group['dim1'].attrs.create('name', b'dim1') group['dim1'].attrs.create('units', b'units1') group.create_dataset(name='dim2', data=range(256)) group['dim2'].attrs.create('name', b'dim2') group['dim2'].attrs.create('units', b'units2') group.create_dataset(name='dim3', data=range(128)) group['dim3'].attrs.create('name', b'dim3') group['dim3'].attrs.create('units', b'units3') group.create_dataset(name='dim4', data=range(128)) group['dim4'].attrs.create('name', b'dim4') group['dim4'].attrs.create('units', b'units4') f.close() yield lt_ctx.load("auto", path=filename, ds_path="/experimental/science_data/data")
def test_dask_array_2(dask_executor): # NOTE: keep in sync with the example in docs/source/api.rst! # Construct a Dask array from the dataset # The second return value contains information # on workers that hold parts of a dataset in local # storage to ensure optimal data locality ctx = Context(executor=dask_executor) dataset = ctx.load("memory", datashape=(16, 16, 16), sig_dims=2) dask_array, workers = make_dask_array(dataset) # Use the Dask.distributed client of LiberTEM, since it may not be # the default client: ctx.executor.client.compute(dask_array.sum(axis=(-1, -2))).result()
class DataSocketSimulator: def __init__(self, path: str, continuous=False, rois=None, max_runs=-1): """ Parameters ---------- path Path to the HDR file continuous If set to True, will continuously output data rois: List[np.ndarray] If a list of ROIs is given, in continuous mode, cycle through these ROIs from the source data max_runs: int Maximum number of continuous runs """ if rois is None: rois = [] if not path.lower().endswith(".hdr"): raise ValueError("please pass the path to the HDR file!") self._path = path self._continuous = continuous self._rois = rois self._ctx = Context(executor=InlineJobExecutor()) self._ds = None self._max_runs = max_runs self._mmaps = {} def open(self): ds = self._ctx.load("mib", path=self._path) print("dataset shape: %s" % (ds.shape, )) self._ds = ds self._warmup() def get_chunks(self): """ generator of `bytes` for the given configuration """ # first, send acquisition header: with open(self._path, 'rb') as f: # FIXME: possibly change header in continuous mode? hdr = f.read() yield get_mpx_header(len(hdr)) yield hdr if self._continuous: print("yielding from continuous") yield from self._get_continuous() else: print("yielding from single scan") roi = np.ones(self._ds.shape.nav, dtype=bool) t = tqdm(total=np.count_nonzero(roi)) try: for item in self._get_single_scan(roi): yield item t.update(1) finally: t.close() def _read_frame_w_header(self, fh, frame_idx, full_frame_size): """ Parameters ---------- fh : LocalFile frame_idx : int File-relative frame index full_frame_size : int Size of header plus frame in bytes """ if fh._file is None: fh.open() f = fh._file fileno = f.fileno() if fileno not in self._mmaps: self._mmaps[fileno] = raw_mmap = mmap.mmap( fileno=f.fileno(), length=0, offset=0, access=mmap.ACCESS_READ, ) else: raw_mmap = self._mmaps[fileno] return bytearray(raw_mmap[full_frame_size * frame_idx:full_frame_size * (frame_idx + 1)]) def _warmup(self): fileset = self._ds._get_fileset() ds_shape = self._ds.shape tiling_scheme = TilingScheme.make_for_shape( tileshape=Shape((1, ) + tuple(ds_shape.sig), sig_dims=ds_shape.sig.dims), dataset_shape=ds_shape, ) slices, ranges, scheme_indices = fileset.get_read_ranges( start_at_frame=0, stop_before_frame=int(np.prod(self._ds.shape.nav)), dtype=np.float32, # FIXME: don't really care... tiling_scheme=tiling_scheme, roi=None, ) def _get_single_scan(self, roi): fileset = self._ds._get_fileset() ds_shape = self._ds.shape tiling_scheme = TilingScheme.make_for_shape( tileshape=Shape((1, ) + tuple(ds_shape.sig), sig_dims=ds_shape.sig.dims), dataset_shape=ds_shape, ) slices, ranges, scheme_indices = fileset.get_read_ranges( start_at_frame=0, stop_before_frame=int(np.prod(self._ds.shape.nav)), dtype=np.float32, # FIXME: don't really care... tiling_scheme=tiling_scheme, roi=roi, ) first_file = self._ds._files_sorted[0] header_size = first_file.fields['header_size_bytes'] full_frame_size = header_size + first_file.fields['image_size_bytes'] mpx_header = get_mpx_header(full_frame_size) for idx in range(slices.shape[0]): origin = slices[idx, 0] # shape = slices[idx, 1] # origin, shape = slices[idx] tile_ranges = ranges[idx][0] file_idx = tile_ranges[0] fh = fileset[file_idx] global_idx = origin[0] local_idx = global_idx - fh.start_idx frame_w_header = self._read_frame_w_header(fh, local_idx, full_frame_size) yield mpx_header yield frame_w_header def _get_continuous(self): if self._rois: rois = self._rois else: rois = [np.ones(self._ds.shape.nav, dtype=bool)] i = 0 for roi in itertools.cycle(rois): t0 = time.time() yield from self._get_single_scan(roi) t1 = time.time() print("cycle %d took %.05fs" % (i, t1 - t0)) i += 1 if self._max_runs != -1 and i >= self._max_runs: raise RuntimeError("max_runs exceeded") def handle_conn(self, conn): for chunk in self.get_chunks(): conn.sendall(chunk) conn.close()