Beispiel #1
0
def test_comp_decomp(path):
    """Compress and decompress a random binary file with integer data type, and check the files
    are byte to byte equal. This would not work for floating-point data types."""
    arr = np.array(np.random.randint(low=0, high=255, size=(1000, 1000)), dtype=np.int16).T
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype,
    )
    decompressed_path = path.with_suffix('.decomp.bin')
    decompress(out, outmeta, out=decompressed_path)

    # Check the files are equal.
    with open(str(path), 'rb') as f:
        buf1 = f.read()
        sha1_original = sha1(buf1)
    with open(str(decompressed_path), 'rb') as f:
        buf2 = f.read()
        sha1_decompressed = sha1(buf2)
    assert buf1 == buf2

    # Check the SHA1s.
    with open(str(out), 'rb') as f:
        sha1_compressed = sha1(f.read())
    with open(str(outmeta), 'r') as f:
        meta = json.load(f)

    assert meta['sha1_compressed'] == sha1_compressed
    assert meta['sha1_uncompressed'] == sha1_decompressed == sha1_original
Beispiel #2
0
def _round_trip(path, arr, **ckwargs):
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1],
        dtype=arr.dtype, **ckwargs)
    unc = decompress(out, outmeta)
    assert np.allclose(unc[:], arr)
    return unc
Beispiel #3
0
def traces(request, tempdir, arr, sample_rate):
    if request.param == 'numpy':
        return get_ephys_reader(arr, sample_rate=sample_rate)

    elif request.param == 'npy':
        path = tempdir / 'data.npy'
        np.save(path, arr)
        return get_ephys_reader(path, sample_rate=sample_rate)

    elif request.param == 'flat':
        path = tempdir / 'data.bin'
        with open(path, 'wb') as f:
            arr.tofile(f)
        return get_ephys_reader(path,
                                sample_rate=sample_rate,
                                dtype=arr.dtype,
                                n_channels=arr.shape[1])

    elif request.param == 'flat_concat':
        path0 = tempdir / 'data0.bin'
        with open(path0, 'wb') as f:
            arr[:arr.shape[0] // 2, :].tofile(f)
        path1 = tempdir / 'data1.bin'
        with open(path1, 'wb') as f:
            arr[arr.shape[0] // 2:, :].tofile(f)
        return get_ephys_reader([path0, path1],
                                sample_rate=sample_rate,
                                dtype=arr.dtype,
                                n_channels=arr.shape[1])

    elif request.param in ('mtscomp', 'mtscomp_reader'):
        path = tempdir / 'data.bin'
        with open(path, 'wb') as f:
            arr.tofile(f)
        out = tempdir / 'data.cbin'
        outmeta = tempdir / 'data.ch'
        mtscomp.compress(path,
                         out,
                         outmeta,
                         sample_rate=sample_rate,
                         n_channels=arr.shape[1],
                         dtype=arr.dtype,
                         n_threads=1,
                         check_after_compress=False,
                         quiet=True)
        reader = mtscomp.decompress(out,
                                    outmeta,
                                    check_after_decompress=False,
                                    quiet=True)
        if request.param == 'mtscomp':
            return get_ephys_reader(reader)
        else:
            return get_ephys_reader(out)
Beispiel #4
0
def test_3d(path):
    file_npy = path.parent.joinpath('titi.npy')
    file_cnpy = path.parent.joinpath('titi.cnpy')
    array = np.random.randint(-5000, high=5000, size=(100, 120, 130), dtype=np.int16)
    np.save(file_npy, array)
    # two way trip - makes sure that
    # 1) the sample_rate fed as an int64 doesn't error
    # 2) the initial shape of the array is saved in the meta-data
    mtscomp_mod.compress(file_npy,
                         out=file_cnpy,
                         outmeta=file_cnpy.with_suffix('.ch'),
                         sample_rate=np.prod(array.shape[1:]),  # here needs to cast as float
                         dtype=array.dtype,
                         do_time_diff=False)
    d = mtscomp_mod.decompress(file_cnpy, cmeta=file_cnpy.with_suffix('.ch'))
    assert np.all(np.isclose(d[:, :].reshape(d.cmeta.shape), array))
Beispiel #5
0
def test_ephys_traces_2(tempdir):
    data = (50 * np.random.randn(1000, 10)).astype(np.int16)
    sample_rate = 100
    path = tempdir / 'data.bin'

    with open(path, 'wb') as f:
        data.tofile(f)

    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    mtscomp.compress(path,
                     out,
                     outmeta,
                     sample_rate=sample_rate,
                     n_channels=data.shape[1],
                     dtype=data.dtype,
                     n_threads=1,
                     check_after_compress=False,
                     quiet=True)
    reader = mtscomp.decompress(out,
                                outmeta,
                                check_after_decompress=False,
                                quiet=True)

    traces = get_ephys_traces(reader)

    assert isinstance(traces, EphysTraces)
    assert isinstance(traces, da.Array)

    assert traces.dtype == data.dtype
    assert traces.shape == data.shape
    assert traces.chunks == ((100, ) * 10, (10, ))

    assert bool(np.all(data == traces).compute()) is True
    assert traces.chunk_bounds == reader.chunk_bounds

    spike_times = [5, 50, 100, 901]
    spike_chunks = traces._get_time_chunks(spike_times)
    ae(spike_chunks, [0, 0, 1, 9])

    waveforms = traces.extract_waveforms(spike_times, [1, 4, 7], 10)
    assert waveforms.shape == (4, 10, 3)

    traces_sub = traces.subset_time_range(2.5, 7.5)
    assert traces_sub.shape == (500, 10)
    assert bool(np.all(traces[250:750, :] == traces_sub).compute()) is True
Beispiel #6
0
def mtscomp_perf(**kwargs):
    ds = kwargs.pop('ds', None)
    assert ds

    name, n_channels, sample_rate, duration = ds

    # Compress the file.
    path = Path('data/' + name)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    t0 = time.perf_counter()
    compress(path,
             out,
             outmeta,
             sample_rate=sample_rate,
             n_channels=n_channels,
             dtype=dtype,
             check_after_compress=False,
             **kwargs)
    t1 = time.perf_counter()
    wt = t1 - t0

    # Decompress the file and write it to disk.
    out2 = path.with_suffix('.decomp.bin')
    t0 = time.perf_counter()
    decompress(out, outmeta, out2, check_after_decompress=False)
    t1 = time.perf_counter()
    rtc = t1 - t0

    # Read the uncompressed file.
    t0 = time.perf_counter()
    x = load_raw_data(path, n_channels=n_channels, dtype=dtype, mmap=False)
    assert x.size
    t1 = time.perf_counter()
    rtdec = t1 - t0

    orig_size = path.stat().st_size
    compressed_size = out.stat().st_size

    return {
        'read_time_compressed': rtc,
        'read_time_decompressed': rtdec,
        'write_time': wt,
        'ratio': 100 - 100 * compressed_size / orig_size,
    }
Beispiel #7
0
def test_decompress_pool(path, arr):
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype,
        check_after_compress=False)
    reader = decompress(out, outmeta, cache_size=2)

    pool = reader.start_thread_pool()
    d1 = reader.decompress_chunks([0, 1, 2], pool=pool)
    d2 = reader.decompress_chunks([1, 2, 3], pool=pool)
    d3 = reader.decompress_chunks([0, 1, 3], pool=pool)
    reader.stop_thread_pool()

    assert sorted(d1.keys()) == [0, 1, 2]
    assert sorted(d2.keys()) == [1, 2, 3]
    assert sorted(d3.keys()) == [0, 1, 3]
Beispiel #8
0
def test_chop(path):
    arr = np.array(np.random.randint(low=0, high=255, size=(1000, 100)), dtype=np.int16)
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=100, n_channels=arr.shape[1], dtype=arr.dtype,
    )

    # Chop with method #1
    r = Reader()
    r.open(out, outmeta)
    out_chopped = out.with_name('data.chopped.cbin')
    assert r.n_chunks == 10
    r.chop(5, out_chopped)
    r.close()
    with open(str(out_chopped), 'rb') as f:
        sha1_chopped = sha1(f.read())

    # Check chopped file.
    r = Reader()
    r.open(out_chopped)
    assert r.n_chunks == 5
    arr_chopped = r[:]
    assert arr_chopped.dtype == arr.dtype
    assert arr_chopped.shape == (500, 100)
    assert np.all(arr_chopped == arr[:500])
    r.close()

    # Chop with method #2
    out_chopped2 = path.parent / 'data.chopped2.cbin'
    outmeta_chopped2 = path.parent / 'data.chopped2.ch'
    _write_arr(path, arr[:500])
    compress(
        path, out_chopped2, outmeta_chopped2, sample_rate=100,
        n_channels=arr.shape[1], dtype=arr.dtype,
    )

    # Check that the chopped file is identical with both methods.
    with open(str(out_chopped2), 'rb') as f:
        sha1_chopped2 = sha1(f.read())
    assert sha1_chopped == sha1_chopped2
Beispiel #9
0
 def compress_file(self, keep_original=True, **kwargs):
     """
     Compresses
     :param keep_original: defaults True. If False, the original uncompressed file is deleted
      and the current spikeglx.Reader object is modified in place
     :param kwargs:
     :return: pathlib.Path of the compressed *.cbin file
     """
     file_out = self.file_bin.with_suffix('.cbin')
     assert not self.is_mtscomp
     mtscomp.compress(self.file_bin,
                      out=file_out,
                      outmeta=self.file_bin.with_suffix('.ch'),
                      sample_rate=self.fs,
                      n_channels=self.nc,
                      dtype=np.int16,
                      **kwargs)
     if not keep_original:
         self.file_bin.unlink()
         self.file_bin = file_out
     return file_out