コード例 #1
0
def chunked_emd(tmpdir_factory):
    lt_ctx = Context(executor=InlineJobExecutor())
    datadir = tmpdir_factory.mktemp('hdf5_chunked_data')
    filename = os.path.join(datadir, 'chunked.emd')

    chunks = (32, 32, 128, 128)

    with h5py.File(filename, mode="w") as f:
        f.attrs.create('version_major', 0)
        f.attrs.create('version_minor', 2)

        f.create_group('experimental/science_data')
        group = f['experimental/science_data']
        group.attrs.create('emd_group_type', 1)

        data = np.ones((256, 256, 128, 128), dtype=np.float32)

        group.create_dataset(name='data', data=data, chunks=chunks)
        group.create_dataset(name='dim1', data=range(256))
        group['dim1'].attrs.create('name', b'dim1')
        group['dim1'].attrs.create('units', b'units1')
        group.create_dataset(name='dim2', data=range(256))
        group['dim2'].attrs.create('name', b'dim2')
        group['dim2'].attrs.create('units', b'units2')
        group.create_dataset(name='dim3', data=range(128))
        group['dim3'].attrs.create('name', b'dim3')
        group['dim3'].attrs.create('units', b'units3')
        group.create_dataset(name='dim4', data=range(128))
        group['dim4'].attrs.create('name', b'dim4')
        group['dim4'].attrs.create('units', b'units4')
        f.close()

    yield lt_ctx.load("auto",
                      path=filename,
                      ds_path="/experimental/science_data/data")
コード例 #2
0
def test_dask_array_2(dask_executor):
    # NOTE: keep in sync with the example in docs/source/api.rst!
    # Construct a Dask array from the dataset
    # The second return value contains information
    # on workers that hold parts of a dataset in local
    # storage to ensure optimal data locality
    ctx = Context(executor=dask_executor)
    dataset = ctx.load("memory", datashape=(16, 16, 16), sig_dims=2)
    dask_array, workers = make_dask_array(dataset)

    # Use the Dask.distributed client of LiberTEM, since it may not be
    # the default client:
    ctx.executor.client.compute(dask_array.sum(axis=(-1, -2))).result()
コード例 #3
0
class DataSocketSimulator:
    def __init__(self, path: str, continuous=False, rois=None, max_runs=-1):
        """
        Parameters
        ----------

        path
            Path to the HDR file

        continuous
            If set to True, will continuously output data

        rois: List[np.ndarray]
            If a list of ROIs is given, in continuous mode, cycle through
            these ROIs from the source data

        max_runs: int
            Maximum number of continuous runs
        """
        if rois is None:
            rois = []
        if not path.lower().endswith(".hdr"):
            raise ValueError("please pass the path to the HDR file!")
        self._path = path
        self._continuous = continuous
        self._rois = rois
        self._ctx = Context(executor=InlineJobExecutor())
        self._ds = None
        self._max_runs = max_runs
        self._mmaps = {}

    def open(self):
        ds = self._ctx.load("mib", path=self._path)
        print("dataset shape: %s" % (ds.shape, ))
        self._ds = ds
        self._warmup()

    def get_chunks(self):
        """
        generator of `bytes` for the given configuration
        """
        # first, send acquisition header:
        with open(self._path, 'rb') as f:
            # FIXME: possibly change header in continuous mode?
            hdr = f.read()
            yield get_mpx_header(len(hdr))
            yield hdr
        if self._continuous:
            print("yielding from continuous")
            yield from self._get_continuous()
        else:
            print("yielding from single scan")
            roi = np.ones(self._ds.shape.nav, dtype=bool)
            t = tqdm(total=np.count_nonzero(roi))
            try:
                for item in self._get_single_scan(roi):
                    yield item
                    t.update(1)
            finally:
                t.close()

    def _read_frame_w_header(self, fh, frame_idx, full_frame_size):
        """
        Parameters
        ----------

        fh : LocalFile

        frame_idx : int
            File-relative frame index

        full_frame_size : int
            Size of header plus frame in bytes
        """
        if fh._file is None:
            fh.open()
        f = fh._file
        fileno = f.fileno()
        if fileno not in self._mmaps:
            self._mmaps[fileno] = raw_mmap = mmap.mmap(
                fileno=f.fileno(),
                length=0,
                offset=0,
                access=mmap.ACCESS_READ,
            )
        else:
            raw_mmap = self._mmaps[fileno]

        return bytearray(raw_mmap[full_frame_size * frame_idx:full_frame_size *
                                  (frame_idx + 1)])

    def _warmup(self):
        fileset = self._ds._get_fileset()
        ds_shape = self._ds.shape
        tiling_scheme = TilingScheme.make_for_shape(
            tileshape=Shape((1, ) + tuple(ds_shape.sig),
                            sig_dims=ds_shape.sig.dims),
            dataset_shape=ds_shape,
        )
        slices, ranges, scheme_indices = fileset.get_read_ranges(
            start_at_frame=0,
            stop_before_frame=int(np.prod(self._ds.shape.nav)),
            dtype=np.float32,  # FIXME: don't really care...
            tiling_scheme=tiling_scheme,
            roi=None,
        )

    def _get_single_scan(self, roi):
        fileset = self._ds._get_fileset()
        ds_shape = self._ds.shape
        tiling_scheme = TilingScheme.make_for_shape(
            tileshape=Shape((1, ) + tuple(ds_shape.sig),
                            sig_dims=ds_shape.sig.dims),
            dataset_shape=ds_shape,
        )
        slices, ranges, scheme_indices = fileset.get_read_ranges(
            start_at_frame=0,
            stop_before_frame=int(np.prod(self._ds.shape.nav)),
            dtype=np.float32,  # FIXME: don't really care...
            tiling_scheme=tiling_scheme,
            roi=roi,
        )

        first_file = self._ds._files_sorted[0]
        header_size = first_file.fields['header_size_bytes']

        full_frame_size = header_size + first_file.fields['image_size_bytes']

        mpx_header = get_mpx_header(full_frame_size)

        for idx in range(slices.shape[0]):
            origin = slices[idx, 0]
            # shape = slices[idx, 1]
            # origin, shape = slices[idx]
            tile_ranges = ranges[idx][0]
            file_idx = tile_ranges[0]
            fh = fileset[file_idx]
            global_idx = origin[0]
            local_idx = global_idx - fh.start_idx
            frame_w_header = self._read_frame_w_header(fh, local_idx,
                                                       full_frame_size)
            yield mpx_header
            yield frame_w_header

    def _get_continuous(self):
        if self._rois:
            rois = self._rois
        else:
            rois = [np.ones(self._ds.shape.nav, dtype=bool)]

        i = 0
        for roi in itertools.cycle(rois):
            t0 = time.time()
            yield from self._get_single_scan(roi)
            t1 = time.time()
            print("cycle %d took %.05fs" % (i, t1 - t0))
            i += 1
            if self._max_runs != -1 and i >= self._max_runs:
                raise RuntimeError("max_runs exceeded")

    def handle_conn(self, conn):
        for chunk in self.get_chunks():
            conn.sendall(chunk)
        conn.close()