Exemple #1
0
def test_compress_decompress():
    a = np.linspace(0, 100, num=1000000).reshape((100, 100, 100))
    tolerance = 0.0000001
    compressed = compress(a, tolerance=tolerance)
    recovered = decompress(compressed, a.shape, a.dtype, tolerance=tolerance)
    assert (a.shape == recovered.shape)
    assert (np.allclose(a, recovered))
Exemple #2
0
def test_dim_order():
    a = np.arange(32, dtype=np.float32).reshape((8, 4))
    compressed = compress(a, rate=8)
    recovered = decompress(compressed[0:16], (4, 4),
                           np.dtype('float32'),
                           rate=8)
    b = np.arange(16, dtype=np.float32).reshape((4, 4))
    assert (np.allclose(recovered, b))
Exemple #3
0
def pyzfp_compress(typed_column):
    """
    """
    numpy_array = np.array(typed_column, dtype=np.float32, order='C')
    compressed_bitstring = pyzfp.compress(numpy_array,
                                          precision=100,
                                          parallel=True)
    return compressed_bitstring
Exemple #4
0
def run_forward_error(filename,
                      space_order=4,
                      kernel='OT4',
                      tolerance=1e-6,
                      nbpml=10,
                      dtype=np.float64,
                      **kwargs):
    # Setup solver

    solver = overthrust_setup(filename=filename,
                              tn=2000,
                              nbpml=nbpml,
                              space_order=space_order,
                              kernel=kernel,
                              dtype=dtype,
                              **kwargs)

    nt = solver.geometry.time_axis.num
    nt_2 = int(floor(nt / 2))

    # Run for nt/2 timesteps as a warm up
    rec, u, profiler = solver.forward(time=nt_2)

    # Store last timestep

    u_comp = TimeFunction(name='u',
                          grid=solver.model.grid,
                          time_order=2,
                          space_order=solver.space_order)
    u_comp.data  # Force memory allocation
    # Compress-decompress with given tolerance

    compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True)

    mem = get_data(u_comp)
    mem[:] = decompress(compressed_u,
                        mem.shape,
                        mem.dtype,
                        tolerance=tolerance)

    # Make new symbols so the data in the symbols above is not changed
    u_copy = TimeFunction(name='u',
                          grid=solver.model.grid,
                          time_order=2,
                          space_order=solver.space_order)
    u_copy.data[:] = u.data[:]
    # Uncompressed/Reference version
    _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_copy)

    u_l_copy = TimeFunction(name='u',
                            grid=solver.model.grid,
                            time_order=2,
                            space_order=solver.space_order)
    # Lossy version
    u_l_copy.data[:] = u_comp.data[:]
    _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt, u=u_l_copy)
def run_forward_error(filename, space_order=4, kernel='OT4', tolerance=0.001,
                      nbpml=10, dtype=np.float64, **kwargs):
    # Setup solver

    solver = overthrust_setup(filename=filename, tn=1000, nbpml=nbpml,
                              space_order=space_order, kernel=kernel,
                              dtype=dtype, **kwargs)

    # Run for nt/2 timesteps as a warm up
    nt = solver.geometry.time_axis.num
    nt_2 = int(floor(nt/2))

    print("first run")
    rec, u, profiler = solver.forward(time=nt_2)
    print("second run")
    _, u2, _ = solver.forward(time=nt_2)

    assert(np.allclose(u.data, u2.data))

    # Store last timestep

    u_comp = TimeFunction(name='u', grid=solver.model.grid, time_order=2,
                          space_order=solver.space_order)
    u_comp.data  # Force memory allocation
    # Compress-decompress with given tolerance

    compressed_u = compress(get_data(u), tolerance=tolerance, parallel=True)

    mem = get_data(u_comp)
    mem[:] = decompress(compressed_u, mem.shape, mem.dtype,
                        tolerance=tolerance)

    for i in range(nt_2):
        # Run for i steps (original last time step and compressed version)
        clear_cache()
        u_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2,
                              space_order=solver.space_order)
        u_copy.data[:] = u.data
        _, u_original, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_copy)

        u_l_copy = TimeFunction(name='u', grid=solver.model.grid, time_order=2,
                                space_order=solver.space_order)
        u_l_copy.data[:] = u_comp.data
        _, u_lossy, _ = solver.forward(time_m=nt_2, time_M=nt_2+i, u=u_l_copy)

        # Compare and report error metrics

        data = get_all_errors(get_data(u_original), get_data(u_lossy))
        # error_field = u_original.data[nt_2+i] - u_lossy.data[nt_2+i]
        data['ntimesteps'] = i
        data['atol'] = tolerance
        write_results(data, "forward_prop_results.csv")
def run(tn=4000,
        space_order=4,
        kernel='OT4',
        nbpml=40,
        tolerance=1e-4,
        filename='',
        **kwargs):
    if kernel in ['OT2', 'OT4']:
        solver = overthrust_setup(filename=filename,
                                  tn=tn,
                                  nbpml=nbpml,
                                  space_order=space_order,
                                  kernel=kernel,
                                  **kwargs)
    else:
        raise ValueError()

    total_timesteps = solver.geometry.src.time_range.num
    u = None
    rec = None
    for t in range(1, total_timesteps - 1):
        rec, u, _ = solver.forward(u=u,
                                   rec=rec,
                                   time_m=t,
                                   time_M=t,
                                   save=False)
        uncompressed = u._data[t]
        with Timer(factor=1000) as time1:
            compressed = compress(uncompressed,
                                  tolerance=tolerance,
                                  parallel=True)
        result = {
            'timestep': t,
            'cf': len(uncompressed.tostring()) / float(len(compressed)),
            'time': time1.elapsed
        }
        write_results(result, "cf_vs_nt.csv")

    _, u2, _ = solver.forward(save=False)
    assert (u2.shape == u.shape)
    assert (np.all(np.isclose(u2.data, u.data)))
Exemple #7
0
def zfp_compress(params, indata):
    return CompressedObject(memoryview(pyzfp.compress(indata, **params)),
                            shape=indata.shape,
                            dtype=indata.dtype)
Exemple #8
0
args = parser.parse_args()

filename = args.filename
plot = args.plot

f = h5py.File(filename, 'r')

field = f['data'][()].astype(np.float64)

tolerances = [10**x for x in range(0, -17, -1)]

error_to_plot = []

for atol in tolerances:
    print("Compressing at tolerance %s" % str(atol))
    compressed = pyzfp.compress(field, tolerance=atol)
    decompressed = pyzfp.decompress(compressed,
                                    shape=field.shape,
                                    dtype=field.dtype,
                                    tolerance=atol)

    computed_errors = {}
    computed_errors['cf'] = len(field.tostring()) / float(len(compressed))
    for k, v in error_metrics.items():
        computed_errors[k] = v(field, decompressed)

    error_function = error_metrics[plot]
    error_to_plot.append(computed_errors[plot])

    computed_errors['tolerance'] = atol
    write_results(computed_errors, 'direct_compression_results.csv')
else:
    from pysz import compress, decompress

f = h5py.File(filename, 'r')
uncompressed = f['data'][()].astype(np.dtype('float64'))
print(
    "\"Size of compressed field\", \"Compression Factor\", \"Compression time\", \"Decompression time\", \"Tolerance\", \"Error norm\", \"Maximum error\""
)
for p_i in range(0, 16):
    tolerance = 0.1**p_i
    with Timer(factor=1000) as t:
        if compressor == "zfp":
            kwargs = {'parallel': parallel, 'tolerance': tolerance}
        else:
            kwargs = {'tolerance': tolerance}
        compressed = compress(uncompressed, **kwargs)

    with Timer(factor=1000) as t2:
        if compressor == "zfp":
            kwargs = {'parallel': parallel, 'tolerance': tolerance}
        else:
            kwargs = {}
        decompressed = decompress(compressed, uncompressed.shape,
                                  uncompressed.dtype, **kwargs)

    #to_hdf5(decompressed, "decompressed-t-%d.h5"%p_i)
    error_matrix = decompressed - uncompressed
    if p_i in (0, 8, 16):
        to_hdf5(error_matrix, "error_field-%s-%d.h5" % (compressor, p_i))
    print("%f, %f, %f, %f, %.16f, %f, %f" %
          (len(compressed), len(uncompressed.tostring()) /
Exemple #10
0
def compress_twix(infile,
                  outfile,
                  remove_os=False,
                  cc_mode=False,
                  ncc=None,
                  cc_tol=0.05,
                  zfp=False,
                  zfp_tol=1e-5,
                  zfp_prec=None,
                  rm_fidnav=False):

    with suppress_stdout_stderr():
        twix = twixtools.read_twix(infile)

    filters = tables.Filters(complevel=5,
                             complib='zlib')  # lossless compression settings
    #filters = None

    mtx = None
    noise_mtx = None
    noise_dmtx = None
    if cc_mode or zfp:
        # # calibrate noise decorrelation matrix for better compression
        # noise = list()
        # for mdb in twix[1]['mdb']:
        #     if mdb.is_flag_set('NOISEADJSCAN'):
        #         noise.append(mdb.data)
        # if len(noise)>0:
        #     noise_dmtx, noise_mtx = calculate_prewhitening(np.asarray(noise).swapaxes(0,1))
        # del(noise)
        pass

    if cc_mode:
        # calibrate coil compression based on last scan in list (image scan)
        # use the calibration coil weights for all data that fits
        cal_data = get_cal_data(twix[-1], remove_os)
        if cc_mode == 'scc' or cc_mode == 'gcc':
            mtx, ncc = calibrate_mtx(cal_data, cc_mode, ncc, cc_tol)
            del (cal_data)
            print('coil compression from %d channels to %d virtual channels' %
                  (mtx.shape[-1], ncc))
        else:
            mtx = calibrate_mtx_bart(cal_data, cc_mode)
            del (cal_data)
            if ncc is None:
                # set default
                ncc = mtx.shape[-1] // 2
            print('coil compression from %d channels to %d virtual channels' %
                  (mtx.shape[-1], ncc))

    t_start = time.time()
    with tables.open_file(outfile, mode="w") as f:
        f.root._v_attrs.original_filename = os.path.basename(infile)
        f.root._v_attrs.cc_mode = cc_mode
        f.root._v_attrs.ncc = ncc
        f.root._v_attrs.zfp = zfp

        if zfp_tol is None:
            f.root._v_attrs.zfp_tol = -1
        else:
            f.root._v_attrs.zfp_tol = zfp_tol
        if zfp_prec is None:
            f.root._v_attrs.zfp_prec = -1
        else:
            f.root._v_attrs.zfp_prec = zfp_prec

        f.create_carray(f.root,
                        "multi_header",
                        obj=np.frombuffer(twix[0].tobytes(), 'S1'),
                        filters=filters)

        if mtx is not None:
            # save mtx for coil compression
            f.create_carray(f.root, "mtx", obj=mtx, filters=filters)
        if noise_dmtx is not None:
            f.create_carray(f.root,
                            "noise_dmtx",
                            obj=noise_dmtx,
                            filters=filters)
            f.create_carray(f.root,
                            "noise_mtx",
                            obj=noise_mtx,
                            filters=filters)

        scanlist = []
        for meas_key, meas in enumerate(twix[1:]):
            scanlist.append("scan%d" % (meas_key))
            grp = f.create_group("/", "scan%d" % (meas_key))
            f.create_carray(grp,
                            "hdr_str",
                            obj=meas['hdr_str'],
                            filters=filters)

            # remove fidnav scans if necessary
            if rm_fidnav:
                for mdb_key, mdb in enumerate(meas['mdb']):
                    if mdb.is_flag_set('noname60'):
                        del (meas['mdb'][mdb_key])

            mdh_count = len(meas['mdb'])

            # create info array with mdh, coil & compression information
            f.create_carray(grp,
                            "info",
                            shape=[mdh_count, datinfo_type.itemsize],
                            atom=tables.UInt8Atom(),
                            filters=filters)

            dt = tables.UInt64Atom(shape=())
            if zfp:
                f.create_vlarray(grp, "DATA", atom=dt, expectedrows=mdh_count)
            else:
                f.create_vlarray(grp,
                                 "DATA",
                                 atom=dt,
                                 filters=filters,
                                 expectedrows=mdh_count)

            syncscans = 0
            for mdb_key, mdb in enumerate(meas['mdb']):
                info = np.zeros(1, dtype=datinfo_type)[0]
                is_syncscan = mdb.is_flag_set('SYNCDATA')
                if rm_fidnav:  # we have to update the scan counters
                    if not is_syncscan:
                        mdb.mdh[
                            'ulScanCounter'] = mdb_key + 1 - syncscans  # scanCounter starts at 1
                    else:
                        syncscans += 1

                # store mdh
                info['mdh_info'] = mdb.mdh

                if is_syncscan or mdb.is_flag_set('ACQEND'):
                    data = np.ascontiguousarray(mdb.data).view('uint64')
                else:
                    restrictions = get_restrictions(mdb.get_flags())
                    if restrictions == 'NO_COILCOMP':
                        data, info['rm_os_active'], _ = reduce_data(
                            mdb.data, mdb.mdh, remove_os, cc_mode=False)
                    else:
                        data, info['rm_os_active'], info[
                            'cc_active'] = reduce_data(mdb.data,
                                                       mdb.mdh,
                                                       remove_os,
                                                       cc_mode=cc_mode,
                                                       mtx=mtx,
                                                       ncc=ncc)
                    data = data.flatten()
                    if zfp:
                        data = pyzfp.compress(data.view('float32'),
                                              tolerance=zfp_tol,
                                              precision=zfp_prec,
                                              parallel=True)
                        data = np.frombuffer(data, dtype='uint64')
                    else:
                        data = data.view('uint64')
                    if len(mdb.channel_hdr) > 0:
                        mdb.channel_hdr[0]['ulScanCounter'] = mdb.mdh[
                            'ulScanCounter']
                        info['coil_info'] = mdb.channel_hdr[0]
                        coil_list = np.asarray(
                            [item['ulChannelId'] for item in mdb.channel_hdr],
                            dtype='uint8')
                        info['coil_list'][:len(coil_list)] = coil_list

                # write data
                grp.DATA.append(data)
                grp.info[mdb_key] = np.frombuffer(info, dtype='uint8')

        f.root._v_attrs.scanlist = scanlist

        # from joblib import Parallel, delayed
        # Parallel(n_jobs=2)(delayed(task)(mdb_key, mdb, is_byte, count, grp, remove_os, zfp, zfp_tol, zfp_prec, mtx) for mdb_key, (mdb, is_byte, count) in enumerate(zip(meas['mdb'], is_bytearray, data_counter)))

    elapsed_time = (time.time() - t_start)
    print("compression finished in %d:%02d:%02d h" %
          (elapsed_time // 3600,
           (elapsed_time % 3600) // 60, elapsed_time % 60))
    print("compression factor = %.2f" %
          (os.path.getsize(infile) / os.path.getsize(outfile)))
    line = segyfile.xline[segyfile.xlines[LINE_NO]]
    slice_segy = line.T
    lines_to_compress = np.zeros((4, line.shape[0], line.shape[1]))
    for i, line in enumerate(lines_to_read):
        lines_to_compress[i, :, :] = segyfile.xline[segyfile.xlines[LINE_NO]]

bitrates = [4, 2, 1]
decompressed_slices = {}

for bits_per_voxel in bitrates:
    padded_shape = (4, pad(lines_to_compress.shape[1], 4),
                    pad(lines_to_compress.shape[2], 2048 // bits_per_voxel))
    data_padded = np.zeros(padded_shape, dtype=np.float32)
    data_padded[0:4, 0:lines_to_compress.shape[1],
                0:lines_to_compress.shape[2]] = lines_to_compress
    compressed = compress(data_padded, rate=bits_per_voxel)

    decompressed = decompress(
        compressed, (padded_shape[0], padded_shape[1], padded_shape[2]),
        np.dtype('float32'),
        rate=bits_per_voxel)

    decompressed_slices[bits_per_voxel] = decompressed[LINE_NO % 4,
                                                       0:slice_segy.shape[1],
                                                       0:slice_segy.shape[0]].T

CLIP = 45000.0
SCALE = 1.0 / (2.0 * CLIP)

from PIL import Image
im = Image.fromarray(