def create_data_file(outfile, data, chunks, nrlow, nrhigh): h5 = h5py.File(outfile, "w") h5.create_group("/entry") h5["/entry"].attrs["NX_class"] = "NXentry" h5.create_group("/entry/data") h5["/entry/data"].attrs["NX_class"] = "NXdata" dataset = compress_h5data(h5, "/entry/data/data", data, chunks) dataset.attrs["image_nr_low"] = nrlow dataset.attrs["image_nr_high"] = nrhigh h5.close()
def modify_master(tmp, trg, bssid, omega_offset_by_trigger=None): print " Modifying master.." h5 = h5py.File(tmp, "a") try: redmas = ReduceMaster(h5) # Remove unnecessary data in /entry/instrument/detector/detectorSpecific/detectorModule_* redmas.remove_redundant(("flatfield", "pixel_mask", "trimbit")) # Compress large data with bslz4 (for compatibility with Neggia plugin) #redmas.compress_large_datasets("bslz4") # Compress large data with bslz4 for pixel_mask and gzip+shuf for others (for compatibility with Neggia plugin and autoPROC) redmas.compress_large_datasets("bslz4_and_gzipshuf") except: print traceback.format_exc() try: # Fix omega values if multi fix_omega(h5, omega_offset_by_trigger) except: print traceback.format_exc() # Put reverse-phi info and fix the links to data.h5 try: put_reversephi_info(h5) except: print traceback.format_exc() if bssid: fix_data_link(h5, bssid) h5.close() # Run h5repack to clean up the removed space p = subprocess.Popen(["h5repack", tmp, trg], shell=False) p.wait()
def cmd_tool(args=None): """ Command line utility for creating HDF5 blimpy files. """ from argparse import ArgumentParser parser = ArgumentParser(description="Command line utility for creating HDF5 Filterbank files.") parser.add_argument('dirname', type=str, help='Name of directory to read') args = parser.parse_args() if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() filelist = glob.glob(os.path.join(args.dirname, '*.fil')) for filename in filelist: if not os.path.exists(filename + '.h5'): t0 = time.time() print("\nReading %s header..." % filename) fb = Filterbank(filename, load_data=False) data_shape = (fb.n_ints_in_file, fb.header['nifs'], fb.header['nchans']) data_dtype = fb.data.dtype print(data_dtype) print("Creating new dataset, %s" % str(data_shape)) block_size = 0 h5 = h5py.File(filename + '.h5', 'w') h5.attrs['CLASS'] = 'FILTERBANK' dset = h5.create_dataset('data', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data_dtype) dset_mask = h5.create_dataset('mask', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype='uint8') dset.dims[0].label = "frequency" dset.dims[1].label = "feed_id" dset.dims[2].label = "time" dset_mask.dims[0].label = "frequency" dset_mask.dims[1].label = "feed_id" dset_mask.dims[2].label = "time" # Copy over header information as attributes for key, value in fb.header.items(): dset.attrs[key] = value filesize = os.path.getsize(filename) if filesize >= MAX_SIZE: n_int_per_read = int(filesize / MAX_SIZE / 2) print("Filling in with data over %i reads..." % n_int_per_read) for ii in range(0, n_int_per_read): print("Reading %i of %i" % (ii + 1, n_int_per_read)) #print ii*n_int_per_read, (ii+1)*n_int_per_read fb = Filterbank(filename, t_start=ii*n_int_per_read, t_stop=(ii+1) * n_int_per_read) dset[ii*n_int_per_read:(ii+1)*n_int_per_read] = fb.data[:] else: fb = Filterbank(filename) print(dset.shape, " -> ", fb.data.shape) dset[:] = fb.data[:] h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1- t0))
def cmd_tool(args=None): """ Command line tool for converting guppi raw into HDF5 versions of guppi raw """ from argparse import ArgumentParser if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() parser = ArgumentParser( description="Command line utility for creating HDF5 Raw files.") parser.add_argument('filename', type=str, help='Name of filename to read') args = parser.parse_args() fileroot = args.filename.split('.0000.raw')[0] filelist = glob.glob(fileroot + '*.raw') filelist = sorted(filelist) # Read first file r = GuppiRaw(filelist[0]) header, data = r.read_next_data_block() dshape = data.shape #r.read_next_data_block_shape() print(dshape) n_blocks_total = 0 for filename in filelist: print(filename) r = GuppiRaw(filename) n_blocks_total += r.n_blocks print(n_blocks_total) full_dshape = np.concatenate(((n_blocks_total, ), dshape)) # Create h5py file h5 = h5py.File(fileroot + '.h5', 'w') h5.attrs['CLASS'] = 'GUPPIRAW' block_size = 0 # This is chunk block size dset = h5.create_dataset( 'data', shape=full_dshape, #compression=bitshuffle.h5.H5FILTER, #compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data.dtype) h5_idx = 0 for filename in filelist: print("\nReading %s header..." % filename) r = GuppiRaw(filename) h5 = h5py.File(filename + '.h5', 'w') header, data = r.read_next_data_block() for ii in range(0, r.n_blocks): t0 = time.time() print("Reading block %i of %i" % (h5_idx + 1, full_dshape[0])) header, data = r.read_next_data_block() t1 = time.time() t2 = time.time() print("Writing block %i of %i" % (h5_idx + 1, full_dshape[0])) dset[h5_idx, :] = data t3 = time.time() print("Read: %2.2fs, Write %2.2fs" % ((t1 - t0), (t3 - t2))) h5_idx += 1 # Copy over header information as attributes for key, value in header.items(): dset.attrs[key] = value h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1 - t0))
def cmd_tool(args=None): """ Command line utility for creating HDF5 blimpy files. """ from argparse import ArgumentParser parser = ArgumentParser( description="Command line utility for creating HDF5 Filterbank files.") parser.add_argument('dirname', type=str, help='Name of directory to read') args = parser.parse_args() if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() filelist = glob.glob(os.path.join(args.dirname, '*.fil')) for filename in filelist: if not os.path.exists(filename + '.h5'): t0 = time.time() print("\nReading %s header..." % filename) fb = Filterbank(filename, load_data=False) data_shape = (fb.n_ints_in_file, fb.header['nifs'], fb.header['nchans']) data_dtype = fb.data.dtype print(data_dtype) print("Creating new dataset, %s" % str(data_shape)) block_size = 0 h5 = h5py.File(filename + '.h5', 'w') h5.attrs['CLASS'] = 'FILTERBANK' dset = h5.create_dataset( 'data', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data_dtype) dset_mask = h5.create_dataset( 'mask', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype='uint8') dset.dims[0].label = "frequency" dset.dims[1].label = "feed_id" dset.dims[2].label = "time" dset_mask.dims[0].label = "frequency" dset_mask.dims[1].label = "feed_id" dset_mask.dims[2].label = "time" # Copy over header information as attributes for key, value in fb.header.items(): dset.attrs[key] = value filesize = os.path.getsize(filename) if filesize >= MAX_SIZE: n_int_per_read = int(filesize / MAX_SIZE / 2) print("Filling in with data over %i reads..." % n_int_per_read) for ii in range(0, n_int_per_read): print("Reading %i of %i" % (ii + 1, n_int_per_read)) #print ii*n_int_per_read, (ii+1)*n_int_per_read fb = Filterbank(filename, t_start=ii * n_int_per_read, t_stop=(ii + 1) * n_int_per_read) dset[ii * n_int_per_read:(ii + 1) * n_int_per_read] = fb.data[:] else: fb = Filterbank(filename) print(dset.shape, " -> ", fb.data.shape) dset[:] = fb.data[:] h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1 - t0))