def test_compress_decompress(): a = np.linspace(0, 100, num=1000000).reshape((100, 100, 100)) tolerance = 0.0000001 compressed = compress(a, tolerance=tolerance) recovered = decompress(compressed, a.shape, a.dtype, tolerance=tolerance) assert (a.shape == recovered.shape) assert (np.allclose(a, recovered))
def test_dim_order(): a = np.arange(32, dtype=np.float32).reshape((8, 4)) compressed = compress(a, rate=8) recovered = decompress(compressed[0:16], (4, 4), np.dtype('float32'), rate=8) b = np.arange(16, dtype=np.float32).reshape((4, 4)) assert (np.allclose(recovered, b))
def pyzfp_compress(typed_column): """ """ numpy_array = np.array(typed_column, dtype=np.float32, order='C') compressed_bitstring = pyzfp.compress(numpy_array, precision=100, parallel=True) return compressed_bitstring
def run_forward_error(filename, space_order=4, kernel='OT4', tolerance=1e-6, nbpml=10, dtype=np.float64, **kwargs): # Setup solver solver = overthrust_setup(filename=filename, tn=2000, nbpml=nbpml, space_order=space_order, kernel=kernel, dtype=dtype, **kwargs) nt = solver.geometry.time_axis.num nt_2 = int(floor(nt / 2)) # Run for nt/2 timesteps as a warm up rec, u, profiler = solver.forward(time=nt_2) # Store last timestep u_comp = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_comp.data # Force memory allocation # Compress-decompress with given tolerance compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True) mem = get_data(u_comp) mem[:] = decompress(compressed_u, mem.shape, mem.dtype, tolerance=tolerance) # Make new symbols so the data in the symbols above is not changed u_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_copy.data[:] = u.data[:] # Uncompressed/Reference version _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_copy) u_l_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) # Lossy version u_l_copy.data[:] = u_comp.data[:] _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_l_copy)
def run_forward_error(filename, space_order=4, kernel='OT4', tolerance=0.001, nbpml=10, dtype=np.float64, **kwargs): # Setup solver solver = overthrust_setup(filename=filename, tn=1000, nbpml=nbpml, space_order=space_order, kernel=kernel, dtype=dtype, **kwargs) # Run for nt/2 timesteps as a warm up nt = solver.geometry.time_axis.num nt_2 = int(floor(nt/2)) print("first run") rec, u, profiler = solver.forward(time=nt_2) print("second run") _, u2, _ = solver.forward(time=nt_2) assert(np.allclose(u.data, u2.data)) # Store last timestep u_comp = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_comp.data # Force memory allocation # Compress-decompress with given tolerance compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True) mem = get_data(u_comp) mem[:] = decompress(compressed_u, mem.shape, mem.dtype, tolerance=tolerance) for i in range(nt_2): # Run for i steps (original last time step and compressed version) clear_cache() u_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_copy.data[:] = u.data _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_copy) u_l_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) u_l_copy.data[:] = u_comp.data _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_l_copy) # Compare and report error metrics data = get_all_errors(get_data(u_original), get_data(u_lossy)) # error_field = u_original.data[nt_2+i] - u_lossy.data[nt_2+i] data['ntimesteps'] = i data['atol'] = tolerance write_results(data, "forward_prop_results.csv")
def run(tn=4000, space_order=4, kernel='OT4', nbpml=40, tolerance=1e-4, filename='', **kwargs): if kernel in ['OT2', 'OT4']: solver = overthrust_setup(filename=filename, tn=tn, nbpml=nbpml, space_order=space_order, kernel=kernel, **kwargs) else: raise ValueError() total_timesteps = solver.geometry.src.time_range.num u = None rec = None for t in range(1, total_timesteps - 1): rec, u, _ = solver.forward(u=u, rec=rec, time_m=t, time_M=t, save=False) uncompressed = u._data[t] with Timer(factor=1000) as time1: compressed = compress(uncompressed, tolerance=tolerance, parallel=True) result = { 'timestep': t, 'cf': len(uncompressed.tostring()) / float(len(compressed)), 'time': time1.elapsed } write_results(result, "cf_vs_nt.csv") _, u2, _ = solver.forward(save=False) assert (u2.shape == u.shape) assert (np.all(np.isclose(u2.data, u.data)))
def zfp_compress(params, indata): return CompressedObject(memoryview(pyzfp.compress(indata, **params)), shape=indata.shape, dtype=indata.dtype)
args = parser.parse_args() filename = args.filename plot = args.plot f = h5py.File(filename, 'r') field = f['data'][()].astype(np.float64) tolerances = [10**x for x in range(0, -17, -1)] error_to_plot = [] for atol in tolerances: print("Compressing at tolerance %s" % str(atol)) compressed = pyzfp.compress(field, tolerance=atol) decompressed = pyzfp.decompress(compressed, shape=field.shape, dtype=field.dtype, tolerance=atol) computed_errors = {} computed_errors['cf'] = len(field.tostring()) / float(len(compressed)) for k, v in error_metrics.items(): computed_errors[k] = v(field, decompressed) error_function = error_metrics[plot] error_to_plot.append(computed_errors[plot]) computed_errors['tolerance'] = atol write_results(computed_errors, 'direct_compression_results.csv')
else: from pysz import compress, decompress f = h5py.File(filename, 'r') uncompressed = f['data'][()].astype(np.dtype('float64')) print( "\"Size of compressed field\", \"Compression Factor\", \"Compression time\", \"Decompression time\", \"Tolerance\", \"Error norm\", \"Maximum error\"" ) for p_i in range(0, 16): tolerance = 0.1**p_i with Timer(factor=1000) as t: if compressor == "zfp": kwargs = {'parallel': parallel, 'tolerance': tolerance} else: kwargs = {'tolerance': tolerance} compressed = compress(uncompressed, **kwargs) with Timer(factor=1000) as t2: if compressor == "zfp": kwargs = {'parallel': parallel, 'tolerance': tolerance} else: kwargs = {} decompressed = decompress(compressed, uncompressed.shape, uncompressed.dtype, **kwargs) #to_hdf5(decompressed, "decompressed-t-%d.h5"%p_i) error_matrix = decompressed - uncompressed if p_i in (0, 8, 16): to_hdf5(error_matrix, "error_field-%s-%d.h5" % (compressor, p_i)) print("%f, %f, %f, %f, %.16f, %f, %f" % (len(compressed), len(uncompressed.tostring()) /
def compress_twix(infile, outfile, remove_os=False, cc_mode=False, ncc=None, cc_tol=0.05, zfp=False, zfp_tol=1e-5, zfp_prec=None, rm_fidnav=False): with suppress_stdout_stderr(): twix = twixtools.read_twix(infile) filters = tables.Filters(complevel=5, complib='zlib') # lossless compression settings #filters = None mtx = None noise_mtx = None noise_dmtx = None if cc_mode or zfp: # # calibrate noise decorrelation matrix for better compression # noise = list() # for mdb in twix[1]['mdb']: # if mdb.is_flag_set('NOISEADJSCAN'): # noise.append(mdb.data) # if len(noise)>0: # noise_dmtx, noise_mtx = calculate_prewhitening(np.asarray(noise).swapaxes(0,1)) # del(noise) pass if cc_mode: # calibrate coil compression based on last scan in list (image scan) # use the calibration coil weights for all data that fits cal_data = get_cal_data(twix[-1], remove_os) if cc_mode == 'scc' or cc_mode == 'gcc': mtx, ncc = calibrate_mtx(cal_data, cc_mode, ncc, cc_tol) del (cal_data) print('coil compression from %d channels to %d virtual channels' % (mtx.shape[-1], ncc)) else: mtx = calibrate_mtx_bart(cal_data, cc_mode) del (cal_data) if ncc is None: # set default ncc = mtx.shape[-1] // 2 print('coil compression from %d channels to %d virtual channels' % (mtx.shape[-1], ncc)) t_start = time.time() with tables.open_file(outfile, mode="w") as f: f.root._v_attrs.original_filename = os.path.basename(infile) f.root._v_attrs.cc_mode = cc_mode f.root._v_attrs.ncc = ncc f.root._v_attrs.zfp = zfp if zfp_tol is None: f.root._v_attrs.zfp_tol = -1 else: f.root._v_attrs.zfp_tol = zfp_tol if zfp_prec is None: f.root._v_attrs.zfp_prec = -1 else: f.root._v_attrs.zfp_prec = zfp_prec f.create_carray(f.root, "multi_header", obj=np.frombuffer(twix[0].tobytes(), 'S1'), filters=filters) if mtx is not None: # save mtx for coil compression f.create_carray(f.root, "mtx", obj=mtx, filters=filters) if noise_dmtx is not None: f.create_carray(f.root, "noise_dmtx", obj=noise_dmtx, filters=filters) f.create_carray(f.root, "noise_mtx", obj=noise_mtx, filters=filters) scanlist = [] for meas_key, meas in enumerate(twix[1:]): scanlist.append("scan%d" % (meas_key)) grp = f.create_group("/", "scan%d" % (meas_key)) f.create_carray(grp, "hdr_str", obj=meas['hdr_str'], filters=filters) # remove fidnav scans if necessary if rm_fidnav: for mdb_key, mdb in enumerate(meas['mdb']): if mdb.is_flag_set('noname60'): del (meas['mdb'][mdb_key]) mdh_count = len(meas['mdb']) # create info array with mdh, coil & compression information f.create_carray(grp, "info", shape=[mdh_count, datinfo_type.itemsize], atom=tables.UInt8Atom(), filters=filters) dt = tables.UInt64Atom(shape=()) if zfp: f.create_vlarray(grp, "DATA", atom=dt, expectedrows=mdh_count) else: f.create_vlarray(grp, "DATA", atom=dt, filters=filters, expectedrows=mdh_count) syncscans = 0 for mdb_key, mdb in enumerate(meas['mdb']): info = np.zeros(1, dtype=datinfo_type)[0] is_syncscan = mdb.is_flag_set('SYNCDATA') if rm_fidnav: # we have to update the scan counters if not is_syncscan: mdb.mdh[ 'ulScanCounter'] = mdb_key + 1 - syncscans # scanCounter starts at 1 else: syncscans += 1 # store mdh info['mdh_info'] = mdb.mdh if is_syncscan or mdb.is_flag_set('ACQEND'): data = np.ascontiguousarray(mdb.data).view('uint64') else: restrictions = get_restrictions(mdb.get_flags()) if restrictions == 'NO_COILCOMP': data, info['rm_os_active'], _ = reduce_data( mdb.data, mdb.mdh, remove_os, cc_mode=False) else: data, info['rm_os_active'], info[ 'cc_active'] = reduce_data(mdb.data, mdb.mdh, remove_os, cc_mode=cc_mode, mtx=mtx, ncc=ncc) data = data.flatten() if zfp: data = pyzfp.compress(data.view('float32'), tolerance=zfp_tol, precision=zfp_prec, parallel=True) data = np.frombuffer(data, dtype='uint64') else: data = data.view('uint64') if len(mdb.channel_hdr) > 0: mdb.channel_hdr[0]['ulScanCounter'] = mdb.mdh[ 'ulScanCounter'] info['coil_info'] = mdb.channel_hdr[0] coil_list = np.asarray( [item['ulChannelId'] for item in mdb.channel_hdr], dtype='uint8') info['coil_list'][:len(coil_list)] = coil_list # write data grp.DATA.append(data) grp.info[mdb_key] = np.frombuffer(info, dtype='uint8') f.root._v_attrs.scanlist = scanlist # from joblib import Parallel, delayed # Parallel(n_jobs=2)(delayed(task)(mdb_key, mdb, is_byte, count, grp, remove_os, zfp, zfp_tol, zfp_prec, mtx) for mdb_key, (mdb, is_byte, count) in enumerate(zip(meas['mdb'], is_bytearray, data_counter))) elapsed_time = (time.time() - t_start) print("compression finished in %d:%02d:%02d h" % (elapsed_time // 3600, (elapsed_time % 3600) // 60, elapsed_time % 60)) print("compression factor = %.2f" % (os.path.getsize(infile) / os.path.getsize(outfile)))
line = segyfile.xline[segyfile.xlines[LINE_NO]] slice_segy = line.T lines_to_compress = np.zeros((4, line.shape[0], line.shape[1])) for i, line in enumerate(lines_to_read): lines_to_compress[i, :, :] = segyfile.xline[segyfile.xlines[LINE_NO]] bitrates = [4, 2, 1] decompressed_slices = {} for bits_per_voxel in bitrates: padded_shape = (4, pad(lines_to_compress.shape[1], 4), pad(lines_to_compress.shape[2], 2048 // bits_per_voxel)) data_padded = np.zeros(padded_shape, dtype=np.float32) data_padded[0:4, 0:lines_to_compress.shape[1], 0:lines_to_compress.shape[2]] = lines_to_compress compressed = compress(data_padded, rate=bits_per_voxel) decompressed = decompress( compressed, (padded_shape[0], padded_shape[1], padded_shape[2]), np.dtype('float32'), rate=bits_per_voxel) decompressed_slices[bits_per_voxel] = decompressed[LINE_NO % 4, 0:slice_segy.shape[1], 0:slice_segy.shape[0]].T CLIP = 45000.0 SCALE = 1.0 / (2.0 * CLIP) from PIL import Image im = Image.fromarray(