def write_master_file( fname, data_files, pixel_mask=None, raw_master_file=None, i0=None, compression=hdf5plugin.Bitshuffle(nelems=0, lz4=True), ): print(color.info(f"Writing: {fname}")) f = h5py.File(fname, "w") nxentry, nxdata = create_entry_and_data(f) if raw_master_file is not None: #add attrs nxdata.attrs["exptime"] = raw_master_file['Exptime'].item() nxdata.attrs["period"] = raw_master_file['Period'].item() # Link written data sets: for i, fname in enumerate(data_files, start=1): f[f"entry/data/data_{i:06d}"] = h5py.ExternalLink( fname, "entry/data/data") #Group to hold instrument specific data grp = nxentry.create_group("instrument/data") # Pixel mask recognized by Albula if pixel_mask is not None: inst = nxentry.create_group("instrument/detector/detectorSpecific") inst.create_dataset("pixel_mask", data=pixel_mask.astype(np.uint8), **compression) if i0 is not None: grp.create_dataset("i0", data=i0, **compression) f.close()
def test(): """Test for Tianlai data.""" import os import time example = input('HDF5 file path: ') f = float(input('Precision reduction parameter f: ')) fsize = os.path.getsize(example) t_s = time.perf_counter() with h5py.File(example, 'r') as df: vis, blorder = df['vis'][...], df['blorder'][...] reduce_precision(vis, blorder, f / N) with h5py.File('example.bs.hdf5', 'w') as df: df.create_dataset('vis', data=vis, **hdf5plugin.Bitshuffle()) t_e = time.perf_counter() print("Throughput(compress): %f MiB/s" % ((fsize / 1024**2) / (t_e - t_s))) del vis t_s = time.perf_counter() with h5py.File('example.bs.hdf5', 'r') as df: vis = df['vis'][...] t_e = time.perf_counter() print("Throughput(decompress): %f MiB/s" % ((fsize / 1024**2) / (t_e - t_s))) bs_fsize = os.path.getsize('example.bs.hdf5') print("Compression rate: %f %%" % ((bs_fsize / fsize) * 100))
def h5writer(fileName, data): '''Writes a NeXus HDF5 file using h5py and numpy''' print("Write a NeXus HDF5 file") timestamp = str(datetime.now()) # create the HDF5 NeXus file with h5py.File(fileName, "w") as f: # point to the default data to be plotted f.attrs['default'] = u'entry' # give the HDF5 root some more attributes f.attrs['file_name'] = fileName f.attrs['file_time'] = timestamp f.attrs['creator'] = u'NXdataImage.py' f.attrs['HDF5_Version'] = h5py.version.hdf5_version f.attrs['h5py_version'] = h5py.version.version # create the NXentry group nxentry = f.create_group('entry_0000') nxentry.attrs['NX_class'] = 'NXentry' nxentry.attrs['default'] = u'image_plot' nxentry.create_dataset('title', data=u'Lima 2D detector acquisition') # create the NXdata group nxdata = nxentry.create_group('measurement') nxdata.attrs['NX_class'] = u'NXdata' nxdata.attrs['signal'] = u'3D data' # Y axis of default plot string_dtype = h5py.special_dtype(vlen=str) nxdata.attrs['axes'] = numpy.array( ['frame_name', 'row_name', 'col_name'], dtype=string_dtype) # X axis of default plot # signal data ds = nxdata.create_dataset('data', data=data, **hdf5plugin.Bitshuffle(nelems=0, lz4=True)) ds.attrs['interpretation'] = u'images' # time axis data ds = nxdata.create_dataset('frame_name', data=numpy.arange(data.shape[0])) ds.attrs['units'] = u'number' ds.attrs[ 'long_name'] = u'Frame number (number)' # suggested Y axis plot label # X axis data ds = nxdata.create_dataset(u'col_name', data=numpy.arange(data.shape[2])) ds.attrs['units'] = u'pixels' ds.attrs[ 'long_name'] = u'Pixel Size X (pixels)' # suggested X axis **hdf5plugin.Bitshuffle(nelems=0, lz4=True)plot label # Y axis data ds = nxdata.create_dataset('row_name', data=numpy.arange(data.shape[1])) ds.attrs['units'] = u'pixels' ds.attrs[ 'long_name'] = u'Pixel Size Y (pixels)' # suggested Y axis plot label print("wrote file:", fileName)
def __write_to_hdf5_light(wf, filename_out, f_scrunch=None, *args, **kwargs): """ Write data to HDF5 file in one go. Args: filename_out (str): Name of output file f_scrunch (int or None): Average (scrunch) N channels together """ block_size = 0 with h5py.File(filename_out, 'w') as h5: h5.attrs['CLASS'] = 'FILTERBANK' h5.attrs['VERSION'] = '1.0' bs_compression = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression'] bs_compression_opts = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression_opts'] if f_scrunch is None: data_out = wf.data else: wf.logger.info('Frequency scrunching by %i' % f_scrunch) data_out = utils.rebin(wf.data, n_z=f_scrunch) wf.header['foff'] *= f_scrunch dset = h5.create_dataset('data', data=data_out, compression=bs_compression, compression_opts=bs_compression_opts) dset_mask = h5.create_dataset('mask', shape=data_out.shape, compression=bs_compression, compression_opts=bs_compression_opts, dtype='uint8') dset.dims[2].label = b"frequency" dset.dims[1].label = b"feed_id" dset.dims[0].label = b"time" dset_mask.dims[2].label = b"frequency" dset_mask.dims[1].label = b"feed_id" dset_mask.dims[0].label = b"time" # Copy over header information as attributes for key, value in wf.header.items(): dset.attrs[key] = value
def main(args: Optional[List[str]] = None) -> None: """ Apply the flat field data set to an images data set. Args: args: Input command line arguments. If None, defaults to sys.argv[1:]. """ args = parser.parse_args(args) method = dict(zip(choices, ["multiplied", "divided"]))[args.method] output_file = args.output_file or args.input_file.stem + f"_flat_field_{method}.h5" output_file = pathlib.Path(output_file).with_suffix(".h5") write_mode = "w" if args.force else "x" try: with h5py.File(args.input_file.with_suffix(".h5")) as f, h5py.File( args.flat_field_file) as g, h5py.File( output_file, write_mode) as h, ProgressBar(): images = da.from_array(f["data"]) flat_field = g["image"] # Multiply or divide the images by the flat-field correction. func = dict(zip(choices, [mul, truediv]))[args.method] images = func(images, np.where(flat_field, flat_field, 1)) images = images.astype(np.uint32) h.require_dataset( "data", shape=images.shape, dtype=images.dtype, chunks=images.chunksize, **hdf5plugin.Bitshuffle(), ) images.store(h["data"]) except FileExistsError: sys.exit(file_exists.format(output_file)) output_nexus = output_file.with_suffix(".nxs") if output_nexus.exists() and not args.force: sys.exit(file_exists.format(output_nexus)) else: try: shutil.copy(args.input_file.with_suffix(".nxs"), output_nexus) with h5py.File(output_nexus, "r+") as f: del f["entry/data/data"] f["entry/data/data"] = h5py.ExternalLink( str(output_file), "data") f["entry/instrument/detector/flatfield_applied"][()] = "TRUE" except FileNotFoundError: sys.exit("Could not find input NeXus file to copy.")
def test_is_h5py_correctly_installed(): """ If this test fails you probably need to install h5py from source manually: $ pip install --no-binary=h5py h5py """ f = h5py.File(tempfile.gettempdir() + '/h5testfile', "w") block_size = 0 dataset = f.create_dataset("data", (100, 100, 100), dtype='float32', **hdf5plugin.Bitshuffle(nelems=0, lz4=True)) array = numpy.random.rand(100, 100, 100) array = array.astype('float32') dataset[:] = array f.close()
def write_data_file( fname, data, image_nr_low=1, compression=hdf5plugin.Bitshuffle(nelems=0, lz4=True), ): print(color.info(f"Writing: {fname}")) f = h5py.File(fname, "w") nxentry, nxdata = create_entry_and_data(f) ds = nxdata.create_dataset( "data", data=data, shape=data.shape, dtype=data.dtype, maxshape=(None, *data.shape[1:]), chunks=(1, data.shape[1], data.shape[2]), **compression, ) ds.attrs["image_nr_low"] = np.int32(image_nr_low) ds.attrs["image_nr_high"] = np.int32(image_nr_low + data.shape[0] - 1) f.close()
f.header.get("motor_pos").split()[f.header.get( "motor_mne").split().index("ths")]) pb = ProgressBar("Projecting frames", nframes * nb_slab, 30) jj = 0 t0 = time.perf_counter() with h5py.File( f"regrid_slab-{oversampling_phi}-{oversampling}-{oversampling}.h5", mode="w") as h: dataset = h.create_dataset("SiO2msgel3", shape=volume, dtype=numpy.float32, chunks=(slab_heigth, ) + volume[1:], **hdf5plugin.Bitshuffle()) h["oversampling_pixel"] = oversampling h["oversampling_phi"] = oversampling_phi h["kernel"] = kernel_src for slab_start in numpy.arange(0, volume[0], slab_heigth, dtype=numpy.int32): slab_end = min(slab_start + slab_heigth, volume[0]) signal_d.fill(0.0) norm_d.fill(0) for j, i in enumerate(frames): f = frames[i] if f is None:
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" __date__ = "12/08/2020" __status__ = "production" __docformat__ = 'restructuredtext' import json import numpy from .. import version from .nexus import Nexus, get_isotime try: import hdf5plugin except: cmp = {} else: cmp = hdf5plugin.Bitshuffle() def _generate_densify_script(integer): "Provide a script to densify those data" res = """#python import numpy frames = [] masked = numpy.where(numpy.logical_not(numpy.isfinite(mask))) for idx, bg in enumerate(background_avg): dense = numpy.interp(mask, radius, bg) flat = dense.ravel() start, stop = frame_ptr[idx:idx+2] flat[index[start:stop]] = intensity[start:stop]""" if integer: res += """
def __write_to_hdf5_heavy(wf, filename_out, f_scrunch=None, *args, **kwargs): """ Write data to HDF5 file. Args: filename_out (str): Name of output file f_scrunch (int or None): Average (scrunch) N channels together """ block_size = 0 # Note that a chunk is not a blob!! # chunk_dim = wf._get_chunk_dimensions() <-- seems intended for raw to fil # And, chunk dimensions should not exceed the Waterfall selection shape dimensions. chunk_list = list(wf._get_chunk_dimensions()) for ix in range(0, len(chunk_list)): if chunk_list[ix] > wf.selection_shape[ix]: chunk_list[ix] = wf.selection_shape[ix] chunk_dim = tuple(chunk_list) blob_dim = wf._get_blob_dimensions(chunk_dim) n_blobs = wf.container.calc_n_blobs(blob_dim) with h5py.File(filename_out, 'w') as h5: h5.attrs['CLASS'] = 'FILTERBANK' h5.attrs['VERSION'] = '1.0' bs_compression = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression'] bs_compression_opts = hdf5plugin.Bitshuffle(nelems=0, lz4=True)['compression_opts'] dout_shape = list(wf.selection_shape) # Make sure not a tuple dout_chunk_dim = list(chunk_dim) if f_scrunch is not None: dout_shape[-1] //= f_scrunch dout_chunk_dim[-1] //= f_scrunch wf.header['foff'] *= f_scrunch dset = h5.create_dataset('data', shape=tuple(dout_shape), chunks=tuple(dout_chunk_dim), compression=bs_compression, compression_opts=bs_compression_opts, dtype=wf.data.dtype) dset_mask = h5.create_dataset('mask', shape=tuple(dout_shape), chunks=tuple(dout_chunk_dim), compression=bs_compression, compression_opts=bs_compression_opts, dtype='uint8') dset.dims[2].label = b"frequency" dset.dims[1].label = b"feed_id" dset.dims[0].label = b"time" dset_mask.dims[2].label = b"frequency" dset_mask.dims[1].label = b"feed_id" dset_mask.dims[0].label = b"time" # Copy over header information as attributes for key, value in wf.header.items(): dset.attrs[key] = value if blob_dim[wf.freq_axis] < wf.selection_shape[wf.freq_axis]: wf.logger.info('Using %i n_blobs to write the data.'% n_blobs) for ii in range(0, n_blobs): wf.logger.info('Reading %i of %i' % (ii + 1, n_blobs)) bob = wf.container.read_blob(blob_dim, n_blob=ii) #----- #Using channels instead of frequency. c_start = wf.container.chan_start_idx + ii * blob_dim[wf.freq_axis] t_start = wf.container.t_start + (c_start / wf.selection_shape[wf.freq_axis]) * blob_dim[wf.time_axis] t_stop = t_start + blob_dim[wf.time_axis] # Reverse array if frequency axis is flipped # if self.header['foff'] < 0: # c_stop = self.selection_shape[self.freq_axis] - (c_start)%self.selection_shape[self.freq_axis] # c_start = c_stop - blob_dim[self.freq_axis] # else: c_start = (c_start) % wf.selection_shape[wf.freq_axis] c_stop = c_start + blob_dim[wf.freq_axis] #----- if f_scrunch is not None: c_start //= f_scrunch c_stop //= f_scrunch bob = utils.rebin(bob, n_z=f_scrunch) wf.logger.debug(t_start,t_stop,c_start,c_stop) dset[t_start:t_stop,0,c_start:c_stop] = bob[:] else: wf.logger.info('Using %i n_blobs to write the data.'% n_blobs) for ii in range(0, n_blobs): wf.logger.info('Reading %i of %i' % (ii + 1, n_blobs)) bob = wf.container.read_blob(blob_dim, n_blob=ii) t_start = wf.container.t_start + ii * blob_dim[wf.time_axis] #This prevents issues when the last blob is smaller than the others in time if (ii+1)*blob_dim[wf.time_axis] > wf.n_ints_in_file: t_stop = wf.n_ints_in_file else: t_stop = (ii+1)*blob_dim[wf.time_axis] if f_scrunch is not None: bob = utils.rebin(bob, n_z=f_scrunch) dset[t_start:t_stop] = bob[:]
import hdf5plugin import h5py import fabio from .common import Nexus, get_isotime from pyFAI.detectors import Detector from pyFAI.geometry import Geometry from pyFAI.units import CONST_hc from dynamix import version as dynamix_version from dynamix.correlator import dense # Dummy factory for correlators CORRELATORS = { i: getattr(dense, i) for i in dir(dense) if i.endswith("Correlator") } COMPRESSION = hdf5plugin.Bitshuffle() class XPCS(Plugin): """This plugin does pixel correlation for XPCS and averages the signal from various bins provided in the qmask. Minimalistic example: { "plugin_name": "id02.xpcs", "data_file": "Janus_Eiger500k_raw.h5", "result_file": "Janus_Eiger500k_xpcs.h5", "sample": { "name": "FAB_PPG", "composition": "FAB_PPG425_250", "temperature": "300K" },
def test(): """Test reduce_precision.""" import time from numpy.random import randn nfreq = 5 # Number of spectral frequencies. nchan = 16 # Number of channels correlated. ntime = 1000 # Number of temporal integrations. f = 0.01 # Precision reduction parameter. N = 100 # Number of samples integrated (delta_f*delta_t). T = 50 # System temperature. band_pass = numpy.arange(nfreq, 2 * nfreq)**2 gain_chan = numpy.arange(nchan, 2 * nchan) nprod = (nchan * (nchan + 1)) // 2 vis = numpy.empty((nfreq, nprod, ntime), numpy.complex64) chan_a = numpy.empty(nprod, numpy.int32) chan_b = numpy.empty(nprod, numpy.int32) k = 0 for i in range(nchan): for j in range(i + 1): chan_a[k], chan_b[k] = i, j k += 1 for k0 in range(nfreq): k1 = 0 for i in range(nchan): for j in range(i + 1): A = T * gain_chan[i] * gain_chan[j] * band_pass[k0] if (i == j): vis_r = A * abs(1 + randn(ntime) / numpy.sqrt(N)) vis_i = 0 else: vis_r = A * randn(ntime) / numpy.sqrt(2 * N) vis_i = A * randn(ntime) / numpy.sqrt(2 * N) vis[k0, k1] = vis_r + vis_i * 1j k1 += 1 # Reduce precision. t_s = time.perf_counter() vis_r, vis_i = reduce_precision(vis, nchan, chan_a, chan_b, f / N) t_e = time.perf_counter() rate = nfreq * nprod * ntime * numpy.dtype( numpy.complex64).itemsize / (t_e - t_s) print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2)) # Compress. with h5py.File('test_float32.h5', 'w') as f: t_s = time.perf_counter() f.create_dataset('vis_r', data=vis_r, **hdf5plugin.Bitshuffle()) f.create_dataset('vis_i', data=vis_i, **hdf5plugin.Bitshuffle()) t_e = time.perf_counter() rate = nfreq * nprod * ntime * numpy.dtype( numpy.complex64).itemsize / (t_e - t_s) print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2)) # Decompress. with h5py.File('test_float32.h5', 'r') as f: t_s = time.perf_counter() vis_r_ = f['vis_r'][...] vis_i_ = f['vis_i'][...] t_e = time.perf_counter() rate = nfreq * nprod * ntime * numpy.dtype( numpy.complex64).itemsize / (t_e - t_s) print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2)) if numpy.any(vis_r_ != vis_r) or numpy.any(vis_i_ != vis_i): raise ValueError('Data changed after I/O.') # Calculate compression rate. import os fsize = os.path.getsize('test_float32.h5') rate = fsize / (nfreq * nprod * ntime * numpy.dtype(numpy.complex64).itemsize) print('Compression rate: %f %%' % (100 * rate))
def test(*, l=False): """Test dnb.reduce_precision and hdf5plugin.Bitshuffle.""" from math import sqrt from time import perf_counter # Parameters. nchan = 16 # Number of channels correlated nsamples = 100 # Number of samples integrated, delta_f*delta_t Tsys = 50 # System temperature f = 0.01 # Precision reduction parameter nfreq = 5 # Added dimensionality, spectral frequencies. ntime = 1000 # Added dimensionality, temporal integrations. # Made up channel dependant gain. gain_chan = numpy.arange(nchan) + nchan # Made up frequency dependant gain. bandpass = (numpy.arange(nfreq) + nfreq)**2 # Generate mock data. Model is pure uncorrelated receiver noise. # Auto correlations are a number, everything else is noise. nprod = (nchan * (nchan + 1)) // 2 vis = numpy.recarray((nfreq, nprod, ntime), DTYPE) chan_a = numpy.empty(nprod, numpy.int64) chan_b = numpy.empty(nprod, numpy.int64) for ff in range(nfreq): kk = 0 for ii in range(nchan): for jj in range(ii, nchan): chan_a[kk] = ii chan_b[kk] = jj amp = Tsys * gain_chan[ii] * gain_chan[jj] * bandpass[ff] if (ii == jj): vis[ff, kk].r = numpy.round( amp * abs(1.0 + numpy.random.randn(ntime) / sqrt(nsamples))) vis[ff, kk].i = 0.0 else: vis[ff, kk].r = numpy.round( amp * numpy.random.randn(ntime) / sqrt(2 * nsamples)) vis[ff, kk].i = numpy.round( amp * numpy.random.randn(ntime) / sqrt(2 * nsamples)) kk += 1 # Reduce precision. t0 = perf_counter() vis_rounded = reduce_precision(vis, nchan, chan_a, chan_b, f / nsamples) t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(reduce_precision): %f MiB/s" % (rate / 1024**2)) # Compress. with h5py.File('test_int32.h5', 'w') as f: t0 = perf_counter() f.create_dataset('mock_data', data=vis_rounded, **hdf5plugin.Bitshuffle()) t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(bitshuffle_compress): %f MiB/s" % (rate / 1024**2)) # Decompress. with h5py.File('test_int32.h5', 'r') as f: t0 = perf_counter() vis_decompressed = f['mock_data'][...] t = perf_counter() - t0 rate = nfreq * nprod * ntime * DTYPE.itemsize / t print("Throughput(bitshuffle_decompress): %f MiB/s" % (rate / 1024**2)) if numpy.any(vis_rounded != vis_decompressed): raise ValueError('Data changed after I/O.') # Calculate compression rate. import os rate = os.path.getsize('test_int32.h5') / (nfreq * nprod * ntime * DTYPE.itemsize) print('Compression rate: %f %%' % (100 * rate)) rounding_error = (vis_rounded.r - vis.r).astype(numpy.int64) if l: print("Rounding bias:") print(numpy.mean(rounding_error, -1)) print("Rounding RMS:") print(numpy.sqrt(numpy.mean(rounding_error**2, -1))) print("Relative to thermal noise:") print(numpy.mean(rounding_error**2, -1) / numpy.var(vis.r, -1))