Python Bitshuffle 예제들, hdf5plugin.Bitshuffle Python 예제들

예제 #1

0

파일 보기

def write_master_file(
        fname,
        data_files,
        pixel_mask=None,
        raw_master_file=None,
        i0=None,
        compression=hdf5plugin.Bitshuffle(nelems=0, lz4=True),
):
    print(color.info(f"Writing: {fname}"))
    f = h5py.File(fname, "w")
    nxentry, nxdata = create_entry_and_data(f)

    if raw_master_file is not None:
        #add attrs
        nxdata.attrs["exptime"] = raw_master_file['Exptime'].item()
        nxdata.attrs["period"] = raw_master_file['Period'].item()

    # Link written data sets:
    for i, fname in enumerate(data_files, start=1):
        f[f"entry/data/data_{i:06d}"] = h5py.ExternalLink(
            fname, "entry/data/data")

    #Group to hold instrument specific data
    grp = nxentry.create_group("instrument/data")

    # Pixel mask recognized by Albula
    if pixel_mask is not None:
        inst = nxentry.create_group("instrument/detector/detectorSpecific")
        inst.create_dataset("pixel_mask",
                            data=pixel_mask.astype(np.uint8),
                            **compression)

    if i0 is not None:
        grp.create_dataset("i0", data=i0, **compression)
    f.close()

예제 #2

0

파일 보기

def test():
    """Test for Tianlai data."""

    import os
    import time

    example = input('HDF5 file path: ')
    f = float(input('Precision reduction parameter f: '))

    fsize = os.path.getsize(example)

    t_s = time.perf_counter()
    with h5py.File(example, 'r') as df:
        vis, blorder = df['vis'][...], df['blorder'][...]
    reduce_precision(vis, blorder, f / N)
    with h5py.File('example.bs.hdf5', 'w') as df:
        df.create_dataset('vis', data=vis, **hdf5plugin.Bitshuffle())
    t_e = time.perf_counter()

    print("Throughput(compress): %f MiB/s" % ((fsize / 1024**2) / (t_e - t_s)))

    del vis

    t_s = time.perf_counter()
    with h5py.File('example.bs.hdf5', 'r') as df:
        vis = df['vis'][...]
    t_e = time.perf_counter()

    print("Throughput(decompress): %f MiB/s" % ((fsize / 1024**2) /
                                                (t_e - t_s)))

    bs_fsize = os.path.getsize('example.bs.hdf5')
    print("Compression rate: %f %%" % ((bs_fsize / fsize) * 100))

예제 #3

0

파일 보기

파일: h5reader.py 프로젝트: kif/dynamix

def h5writer(fileName, data):
    '''Writes a NeXus HDF5 file using h5py and numpy'''
    print("Write a NeXus HDF5 file")

    timestamp = str(datetime.now())

    # create the HDF5 NeXus file
    with h5py.File(fileName, "w") as f:
        # point to the default data to be plotted
        f.attrs['default'] = u'entry'
        # give the HDF5 root some more attributes
        f.attrs['file_name'] = fileName
        f.attrs['file_time'] = timestamp
        f.attrs['creator'] = u'NXdataImage.py'
        f.attrs['HDF5_Version'] = h5py.version.hdf5_version
        f.attrs['h5py_version'] = h5py.version.version

        # create the NXentry group
        nxentry = f.create_group('entry_0000')
        nxentry.attrs['NX_class'] = 'NXentry'
        nxentry.attrs['default'] = u'image_plot'
        nxentry.create_dataset('title', data=u'Lima 2D detector acquisition')

        # create the NXdata group
        nxdata = nxentry.create_group('measurement')
        nxdata.attrs['NX_class'] = u'NXdata'
        nxdata.attrs['signal'] = u'3D data'  # Y axis of default plot
        string_dtype = h5py.special_dtype(vlen=str)
        nxdata.attrs['axes'] = numpy.array(
            ['frame_name', 'row_name', 'col_name'],
            dtype=string_dtype)  # X axis of default plot

        # signal data
        ds = nxdata.create_dataset('data',
                                   data=data,
                                   **hdf5plugin.Bitshuffle(nelems=0, lz4=True))
        ds.attrs['interpretation'] = u'images'

        # time axis data
        ds = nxdata.create_dataset('frame_name',
                                   data=numpy.arange(data.shape[0]))
        ds.attrs['units'] = u'number'
        ds.attrs[
            'long_name'] = u'Frame number (number)'  # suggested Y axis plot label

        # X axis data
        ds = nxdata.create_dataset(u'col_name',
                                   data=numpy.arange(data.shape[2]))
        ds.attrs['units'] = u'pixels'
        ds.attrs[
            'long_name'] = u'Pixel Size X (pixels)'  # suggested X axis **hdf5plugin.Bitshuffle(nelems=0, lz4=True)plot label

        # Y axis data
        ds = nxdata.create_dataset('row_name',
                                   data=numpy.arange(data.shape[1]))
        ds.attrs['units'] = u'pixels'
        ds.attrs[
            'long_name'] = u'Pixel Size Y (pixels)'  # suggested Y axis plot label

    print("wrote file:", fileName)

예제 #4

0

파일 보기

def __write_to_hdf5_light(wf, filename_out, f_scrunch=None, *args, **kwargs):
    """ Write data to HDF5 file in one go.

    Args:
        filename_out (str): Name of output file
        f_scrunch (int or None): Average (scrunch) N channels together
    """

    block_size = 0

    with h5py.File(filename_out, 'w') as h5:

        h5.attrs['CLASS']   = 'FILTERBANK'
        h5.attrs['VERSION'] = '1.0'

        bs_compression = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression']
        bs_compression_opts = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression_opts']

        if f_scrunch is None:
            data_out = wf.data
        else:
            wf.logger.info('Frequency scrunching by %i' % f_scrunch)
            data_out = utils.rebin(wf.data, n_z=f_scrunch)
            wf.header['foff'] *= f_scrunch

        dset = h5.create_dataset('data',
                                 data=data_out,
                                 compression=bs_compression,
                                 compression_opts=bs_compression_opts)

        dset_mask = h5.create_dataset('mask',
                                      shape=data_out.shape,
                                      compression=bs_compression,
                                      compression_opts=bs_compression_opts,
                                      dtype='uint8')

        dset.dims[2].label = b"frequency"
        dset.dims[1].label = b"feed_id"
        dset.dims[0].label = b"time"

        dset_mask.dims[2].label = b"frequency"
        dset_mask.dims[1].label = b"feed_id"
        dset_mask.dims[0].label = b"time"

        # Copy over header information as attributes
        for key, value in wf.header.items():
            dset.attrs[key] = value

예제 #5

0

파일 보기

파일: apply_flat_field.py 프로젝트: DiamondLightSource/python-tristan

def main(args: Optional[List[str]] = None) -> None:
    """
    Apply the flat field data set to an images data set.

    Args:
        args:  Input command line arguments.  If None, defaults to sys.argv[1:].
    """
    args = parser.parse_args(args)

    method = dict(zip(choices, ["multiplied", "divided"]))[args.method]
    output_file = args.output_file or args.input_file.stem + f"_flat_field_{method}.h5"
    output_file = pathlib.Path(output_file).with_suffix(".h5")
    write_mode = "w" if args.force else "x"

    try:
        with h5py.File(args.input_file.with_suffix(".h5")) as f, h5py.File(
                args.flat_field_file) as g, h5py.File(
                    output_file, write_mode) as h, ProgressBar():
            images = da.from_array(f["data"])
            flat_field = g["image"]
            # Multiply or divide the images by the flat-field correction.
            func = dict(zip(choices, [mul, truediv]))[args.method]
            images = func(images, np.where(flat_field, flat_field, 1))
            images = images.astype(np.uint32)
            h.require_dataset(
                "data",
                shape=images.shape,
                dtype=images.dtype,
                chunks=images.chunksize,
                **hdf5plugin.Bitshuffle(),
            )
            images.store(h["data"])
    except FileExistsError:
        sys.exit(file_exists.format(output_file))

    output_nexus = output_file.with_suffix(".nxs")
    if output_nexus.exists() and not args.force:
        sys.exit(file_exists.format(output_nexus))
    else:
        try:
            shutil.copy(args.input_file.with_suffix(".nxs"), output_nexus)
            with h5py.File(output_nexus, "r+") as f:
                del f["entry/data/data"]
                f["entry/data/data"] = h5py.ExternalLink(
                    str(output_file), "data")
                f["entry/instrument/detector/flatfield_applied"][()] = "TRUE"
        except FileNotFoundError:
            sys.exit("Could not find input NeXus file to copy.")

예제 #6

0

파일 보기

def test_is_h5py_correctly_installed():
    """
    If this test fails you probably need to install h5py from source manually:

    $ pip install --no-binary=h5py h5py
    """
    f = h5py.File(tempfile.gettempdir() + '/h5testfile', "w")
    block_size = 0
    dataset = f.create_dataset("data", (100, 100, 100),
                               dtype='float32',
                               **hdf5plugin.Bitshuffle(nelems=0, lz4=True))

    array = numpy.random.rand(100, 100, 100)
    array = array.astype('float32')
    dataset[:] = array
    f.close()

예제 #7

0

파일 보기

def write_data_file(
        fname,
        data,
        image_nr_low=1,
        compression=hdf5plugin.Bitshuffle(nelems=0, lz4=True),
):
    print(color.info(f"Writing: {fname}"))
    f = h5py.File(fname, "w")
    nxentry, nxdata = create_entry_and_data(f)
    ds = nxdata.create_dataset(
        "data",
        data=data,
        shape=data.shape,
        dtype=data.dtype,
        maxshape=(None, *data.shape[1:]),
        chunks=(1, data.shape[1], data.shape[2]),
        **compression,
    )
    ds.attrs["image_nr_low"] = np.int32(image_nr_low)
    ds.attrs["image_nr_high"] = np.int32(image_nr_low + data.shape[0] - 1)
    f.close()

예제 #8

0

파일 보기

        f.header.get("motor_pos").split()[f.header.get(
            "motor_mne").split().index("ths")])


pb = ProgressBar("Projecting frames", nframes * nb_slab, 30)
jj = 0
t0 = time.perf_counter()

with h5py.File(
        f"regrid_slab-{oversampling_phi}-{oversampling}-{oversampling}.h5",
        mode="w") as h:
    dataset = h.create_dataset("SiO2msgel3",
                               shape=volume,
                               dtype=numpy.float32,
                               chunks=(slab_heigth, ) + volume[1:],
                               **hdf5plugin.Bitshuffle())
    h["oversampling_pixel"] = oversampling
    h["oversampling_phi"] = oversampling_phi
    h["kernel"] = kernel_src

    for slab_start in numpy.arange(0,
                                   volume[0],
                                   slab_heigth,
                                   dtype=numpy.int32):
        slab_end = min(slab_start + slab_heigth, volume[0])
        signal_d.fill(0.0)
        norm_d.fill(0)

        for j, i in enumerate(frames):
            f = frames[i]
            if f is None:

예제 #9

0

파일 보기

__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "12/08/2020"
__status__ = "production"
__docformat__ = 'restructuredtext'

import json
import numpy
from .. import version
from .nexus import Nexus, get_isotime

try:
    import hdf5plugin
except:
    cmp = {}
else:
    cmp = hdf5plugin.Bitshuffle()


def _generate_densify_script(integer):
    "Provide a script to densify those data"
    res = """#python
import numpy
frames = []
masked = numpy.where(numpy.logical_not(numpy.isfinite(mask)))
for idx, bg in enumerate(background_avg):
    dense = numpy.interp(mask, radius, bg)
    flat = dense.ravel()
    start, stop = frame_ptr[idx:idx+2]
    flat[index[start:stop]] = intensity[start:stop]"""
    if integer:
        res += """

예제 #10

0

파일 보기

def __write_to_hdf5_heavy(wf, filename_out, f_scrunch=None, *args, **kwargs):
    """ Write data to HDF5 file.

    Args:
        filename_out (str): Name of output file
        f_scrunch (int or None): Average (scrunch) N channels together
    """

    block_size = 0

    # Note that a chunk is not a blob!!
    # chunk_dim = wf._get_chunk_dimensions() <-- seems intended for raw to fil
    # And, chunk dimensions should not exceed the Waterfall selection shape dimensions.
    chunk_list = list(wf._get_chunk_dimensions())
    for ix in range(0, len(chunk_list)):
        if chunk_list[ix] > wf.selection_shape[ix]:
            chunk_list[ix] = wf.selection_shape[ix]
    chunk_dim = tuple(chunk_list)
    blob_dim  = wf._get_blob_dimensions(chunk_dim)
    n_blobs   = wf.container.calc_n_blobs(blob_dim)

    with h5py.File(filename_out, 'w') as h5:

        h5.attrs['CLASS'] = 'FILTERBANK'
        h5.attrs['VERSION'] = '1.0'

        bs_compression = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression']
        bs_compression_opts = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression_opts']

        dout_shape     = list(wf.selection_shape)    # Make sure not a tuple
        dout_chunk_dim = list(chunk_dim)

        if f_scrunch is not None:
            dout_shape[-1] //= f_scrunch
            dout_chunk_dim[-1] //= f_scrunch
            wf.header['foff'] *= f_scrunch

        dset = h5.create_dataset('data',
                                 shape=tuple(dout_shape),
                                 chunks=tuple(dout_chunk_dim),
                                 compression=bs_compression,
                                 compression_opts=bs_compression_opts,
                                 dtype=wf.data.dtype)

        dset_mask = h5.create_dataset('mask',
                                      shape=tuple(dout_shape),
                                      chunks=tuple(dout_chunk_dim),
                                      compression=bs_compression,
                                      compression_opts=bs_compression_opts,
                                      dtype='uint8')

        dset.dims[2].label = b"frequency"
        dset.dims[1].label = b"feed_id"
        dset.dims[0].label = b"time"

        dset_mask.dims[2].label = b"frequency"
        dset_mask.dims[1].label = b"feed_id"
        dset_mask.dims[0].label = b"time"

        # Copy over header information as attributes
        for key, value in wf.header.items():
            dset.attrs[key] = value

        if blob_dim[wf.freq_axis] < wf.selection_shape[wf.freq_axis]:

            wf.logger.info('Using %i n_blobs to write the data.'% n_blobs)
            for ii in range(0, n_blobs):
                wf.logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                bob = wf.container.read_blob(blob_dim, n_blob=ii)

                #-----
                #Using channels instead of frequency.
                c_start = wf.container.chan_start_idx + ii * blob_dim[wf.freq_axis]
                t_start = wf.container.t_start + (c_start / wf.selection_shape[wf.freq_axis]) * blob_dim[wf.time_axis]
                t_stop = t_start + blob_dim[wf.time_axis]

                # Reverse array if frequency axis is flipped
#                     if self.header['foff'] < 0:
#                         c_stop = self.selection_shape[self.freq_axis] - (c_start)%self.selection_shape[self.freq_axis]
#                         c_start = c_stop - blob_dim[self.freq_axis]
#                     else:
                c_start = (c_start) % wf.selection_shape[wf.freq_axis]
                c_stop = c_start + blob_dim[wf.freq_axis]
                #-----

                if f_scrunch is not None:
                    c_start //= f_scrunch
                    c_stop  //= f_scrunch
                    bob = utils.rebin(bob, n_z=f_scrunch)

                wf.logger.debug(t_start,t_stop,c_start,c_stop)
                dset[t_start:t_stop,0,c_start:c_stop] = bob[:]

        else:

            wf.logger.info('Using %i n_blobs to write the data.'% n_blobs)
            for ii in range(0, n_blobs):
                wf.logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                bob = wf.container.read_blob(blob_dim, n_blob=ii)
                t_start = wf.container.t_start + ii * blob_dim[wf.time_axis]

                #This prevents issues when the last blob is smaller than the others in time
                if (ii+1)*blob_dim[wf.time_axis] > wf.n_ints_in_file:
                    t_stop = wf.n_ints_in_file
                else:
                    t_stop = (ii+1)*blob_dim[wf.time_axis]

                if f_scrunch is not None:
                    bob = utils.rebin(bob, n_z=f_scrunch)

                dset[t_start:t_stop] = bob[:]

예제 #11

0

파일 보기

파일: xpcs.py 프로젝트: kif/dahu

import hdf5plugin
import h5py
import fabio
from .common import Nexus, get_isotime
from pyFAI.detectors import Detector
from pyFAI.geometry import Geometry
from pyFAI.units import CONST_hc
from dynamix import version as dynamix_version
from dynamix.correlator import dense
# Dummy factory for correlators
CORRELATORS = {
    i: getattr(dense, i)
    for i in dir(dense) if i.endswith("Correlator")
}
COMPRESSION = hdf5plugin.Bitshuffle()


class XPCS(Plugin):
    """This plugin does pixel correlation for XPCS and averages the signal from various bins provided in the qmask. 

Minimalistic example:
{
    "plugin_name": "id02.xpcs",
    "data_file": "Janus_Eiger500k_raw.h5",
    "result_file": "Janus_Eiger500k_xpcs.h5",
    "sample": {
        "name": "FAB_PPG",
        "composition": "FAB_PPG425_250",
        "temperature": "300K"
    },

예제 #12

0

파일 보기

def test():
    """Test reduce_precision."""

    import time
    from numpy.random import randn

    nfreq = 5  # Number of spectral frequencies.
    nchan = 16  # Number of channels correlated.
    ntime = 1000  # Number of temporal integrations.

    f = 0.01  # Precision reduction parameter.
    N = 100  # Number of samples integrated (delta_f*delta_t).

    T = 50  # System temperature.

    band_pass = numpy.arange(nfreq, 2 * nfreq)**2
    gain_chan = numpy.arange(nchan, 2 * nchan)

    nprod = (nchan * (nchan + 1)) // 2
    vis = numpy.empty((nfreq, nprod, ntime), numpy.complex64)
    chan_a = numpy.empty(nprod, numpy.int32)
    chan_b = numpy.empty(nprod, numpy.int32)

    k = 0
    for i in range(nchan):
        for j in range(i + 1):
            chan_a[k], chan_b[k] = i, j
            k += 1

    for k0 in range(nfreq):
        k1 = 0
        for i in range(nchan):
            for j in range(i + 1):
                A = T * gain_chan[i] * gain_chan[j] * band_pass[k0]
                if (i == j):
                    vis_r = A * abs(1 + randn(ntime) / numpy.sqrt(N))
                    vis_i = 0
                else:
                    vis_r = A * randn(ntime) / numpy.sqrt(2 * N)
                    vis_i = A * randn(ntime) / numpy.sqrt(2 * N)
                vis[k0, k1] = vis_r + vis_i * 1j
                k1 += 1

    # Reduce precision.
    t_s = time.perf_counter()
    vis_r, vis_i = reduce_precision(vis, nchan, chan_a, chan_b, f / N)
    t_e = time.perf_counter()

    rate = nfreq * nprod * ntime * numpy.dtype(
        numpy.complex64).itemsize / (t_e - t_s)
    print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2))

    # Compress.
    with h5py.File('test_float32.h5', 'w') as f:
        t_s = time.perf_counter()
        f.create_dataset('vis_r', data=vis_r, **hdf5plugin.Bitshuffle())
        f.create_dataset('vis_i', data=vis_i, **hdf5plugin.Bitshuffle())
        t_e = time.perf_counter()

    rate = nfreq * nprod * ntime * numpy.dtype(
        numpy.complex64).itemsize / (t_e - t_s)
    print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2))

    # Decompress.
    with h5py.File('test_float32.h5', 'r') as f:
        t_s = time.perf_counter()
        vis_r_ = f['vis_r'][...]
        vis_i_ = f['vis_i'][...]
        t_e = time.perf_counter()

    rate = nfreq * nprod * ntime * numpy.dtype(
        numpy.complex64).itemsize / (t_e - t_s)
    print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2))

    if numpy.any(vis_r_ != vis_r) or numpy.any(vis_i_ != vis_i):
        raise ValueError('Data changed after I/O.')

    # Calculate compression rate.
    import os
    fsize = os.path.getsize('test_float32.h5')
    rate = fsize / (nfreq * nprod * ntime *
                    numpy.dtype(numpy.complex64).itemsize)
    print('Compression rate: %f %%' % (100 * rate))

예제 #13

0

파일 보기

파일: test_int32.py 프로젝트: fzyzcjy/h5bs

def test(*, l=False):
    """Test dnb.reduce_precision and hdf5plugin.Bitshuffle."""

    from math import sqrt
    from time import perf_counter

    # Parameters.
    nchan = 16  # Number of channels correlated
    nsamples = 100  # Number of samples integrated, delta_f*delta_t
    Tsys = 50  # System temperature
    f = 0.01  # Precision reduction parameter
    nfreq = 5  # Added dimensionality, spectral frequencies.
    ntime = 1000  # Added dimensionality, temporal integrations.

    # Made up channel dependant gain.
    gain_chan = numpy.arange(nchan) + nchan
    # Made up frequency dependant gain.
    bandpass = (numpy.arange(nfreq) + nfreq)**2

    # Generate mock data. Model is pure uncorrelated receiver noise.
    # Auto correlations are a number, everything else is noise.
    nprod = (nchan * (nchan + 1)) // 2
    vis = numpy.recarray((nfreq, nprod, ntime), DTYPE)
    chan_a = numpy.empty(nprod, numpy.int64)
    chan_b = numpy.empty(nprod, numpy.int64)

    for ff in range(nfreq):
        kk = 0
        for ii in range(nchan):
            for jj in range(ii, nchan):
                chan_a[kk] = ii
                chan_b[kk] = jj

                amp = Tsys * gain_chan[ii] * gain_chan[jj] * bandpass[ff]
                if (ii == jj):
                    vis[ff, kk].r = numpy.round(
                        amp *
                        abs(1.0 + numpy.random.randn(ntime) / sqrt(nsamples)))
                    vis[ff, kk].i = 0.0
                else:
                    vis[ff, kk].r = numpy.round(
                        amp * numpy.random.randn(ntime) / sqrt(2 * nsamples))
                    vis[ff, kk].i = numpy.round(
                        amp * numpy.random.randn(ntime) / sqrt(2 * nsamples))
                kk += 1

    # Reduce precision.
    t0 = perf_counter()
    vis_rounded = reduce_precision(vis, nchan, chan_a, chan_b, f / nsamples)
    t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2))

    # Compress.
    with h5py.File('test_int32.h5', 'w') as f:
        t0 = perf_counter()
        f.create_dataset('mock_data',
                         data=vis_rounded,
                         **hdf5plugin.Bitshuffle())
        t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2))

    # Decompress.
    with h5py.File('test_int32.h5', 'r') as f:
        t0 = perf_counter()
        vis_decompressed = f['mock_data'][...]
        t = perf_counter() - t0

    rate = nfreq * nprod * ntime * DTYPE.itemsize / t
    print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2))

    if numpy.any(vis_rounded != vis_decompressed):
        raise ValueError('Data changed after I/O.')

    # Calculate compression rate.
    import os
    rate = os.path.getsize('test_int32.h5') / (nfreq * nprod * ntime *
                                               DTYPE.itemsize)
    print('Compression rate: %f %%' % (100 * rate))

    rounding_error = (vis_rounded.r - vis.r).astype(numpy.int64)
    if l:
        print("Rounding bias:")
        print(numpy.mean(rounding_error, -1))
        print("Rounding RMS:")
        print(numpy.sqrt(numpy.mean(rounding_error**2, -1)))
        print("Relative to thermal noise:")
        print(numpy.mean(rounding_error**2, -1) / numpy.var(vis.r, -1))