def test_comp_decomp(path): """Compress and decompress a random binary file with integer data type, and check the files are byte to byte equal. This would not work for floating-point data types.""" arr = np.array(np.random.randint(low=0, high=255, size=(1000, 1000)), dtype=np.int16).T _write_arr(path, arr) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' compress( path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype, ) decompressed_path = path.with_suffix('.decomp.bin') decompress(out, outmeta, out=decompressed_path) # Check the files are equal. with open(str(path), 'rb') as f: buf1 = f.read() sha1_original = sha1(buf1) with open(str(decompressed_path), 'rb') as f: buf2 = f.read() sha1_decompressed = sha1(buf2) assert buf1 == buf2 # Check the SHA1s. with open(str(out), 'rb') as f: sha1_compressed = sha1(f.read()) with open(str(outmeta), 'r') as f: meta = json.load(f) assert meta['sha1_compressed'] == sha1_compressed assert meta['sha1_uncompressed'] == sha1_decompressed == sha1_original
def _round_trip(path, arr, **ckwargs): _write_arr(path, arr) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' compress( path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype, **ckwargs) unc = decompress(out, outmeta) assert np.allclose(unc[:], arr) return unc
def traces(request, tempdir, arr, sample_rate): if request.param == 'numpy': return get_ephys_reader(arr, sample_rate=sample_rate) elif request.param == 'npy': path = tempdir / 'data.npy' np.save(path, arr) return get_ephys_reader(path, sample_rate=sample_rate) elif request.param == 'flat': path = tempdir / 'data.bin' with open(path, 'wb') as f: arr.tofile(f) return get_ephys_reader(path, sample_rate=sample_rate, dtype=arr.dtype, n_channels=arr.shape[1]) elif request.param == 'flat_concat': path0 = tempdir / 'data0.bin' with open(path0, 'wb') as f: arr[:arr.shape[0] // 2, :].tofile(f) path1 = tempdir / 'data1.bin' with open(path1, 'wb') as f: arr[arr.shape[0] // 2:, :].tofile(f) return get_ephys_reader([path0, path1], sample_rate=sample_rate, dtype=arr.dtype, n_channels=arr.shape[1]) elif request.param in ('mtscomp', 'mtscomp_reader'): path = tempdir / 'data.bin' with open(path, 'wb') as f: arr.tofile(f) out = tempdir / 'data.cbin' outmeta = tempdir / 'data.ch' mtscomp.compress(path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype, n_threads=1, check_after_compress=False, quiet=True) reader = mtscomp.decompress(out, outmeta, check_after_decompress=False, quiet=True) if request.param == 'mtscomp': return get_ephys_reader(reader) else: return get_ephys_reader(out)
def test_3d(path): file_npy = path.parent.joinpath('titi.npy') file_cnpy = path.parent.joinpath('titi.cnpy') array = np.random.randint(-5000, high=5000, size=(100, 120, 130), dtype=np.int16) np.save(file_npy, array) # two way trip - makes sure that # 1) the sample_rate fed as an int64 doesn't error # 2) the initial shape of the array is saved in the meta-data mtscomp_mod.compress(file_npy, out=file_cnpy, outmeta=file_cnpy.with_suffix('.ch'), sample_rate=np.prod(array.shape[1:]), # here needs to cast as float dtype=array.dtype, do_time_diff=False) d = mtscomp_mod.decompress(file_cnpy, cmeta=file_cnpy.with_suffix('.ch')) assert np.all(np.isclose(d[:, :].reshape(d.cmeta.shape), array))
def test_ephys_traces_2(tempdir): data = (50 * np.random.randn(1000, 10)).astype(np.int16) sample_rate = 100 path = tempdir / 'data.bin' with open(path, 'wb') as f: data.tofile(f) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' mtscomp.compress(path, out, outmeta, sample_rate=sample_rate, n_channels=data.shape[1], dtype=data.dtype, n_threads=1, check_after_compress=False, quiet=True) reader = mtscomp.decompress(out, outmeta, check_after_decompress=False, quiet=True) traces = get_ephys_traces(reader) assert isinstance(traces, EphysTraces) assert isinstance(traces, da.Array) assert traces.dtype == data.dtype assert traces.shape == data.shape assert traces.chunks == ((100, ) * 10, (10, )) assert bool(np.all(data == traces).compute()) is True assert traces.chunk_bounds == reader.chunk_bounds spike_times = [5, 50, 100, 901] spike_chunks = traces._get_time_chunks(spike_times) ae(spike_chunks, [0, 0, 1, 9]) waveforms = traces.extract_waveforms(spike_times, [1, 4, 7], 10) assert waveforms.shape == (4, 10, 3) traces_sub = traces.subset_time_range(2.5, 7.5) assert traces_sub.shape == (500, 10) assert bool(np.all(traces[250:750, :] == traces_sub).compute()) is True
def mtscomp_perf(**kwargs): ds = kwargs.pop('ds', None) assert ds name, n_channels, sample_rate, duration = ds # Compress the file. path = Path('data/' + name) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' t0 = time.perf_counter() compress(path, out, outmeta, sample_rate=sample_rate, n_channels=n_channels, dtype=dtype, check_after_compress=False, **kwargs) t1 = time.perf_counter() wt = t1 - t0 # Decompress the file and write it to disk. out2 = path.with_suffix('.decomp.bin') t0 = time.perf_counter() decompress(out, outmeta, out2, check_after_decompress=False) t1 = time.perf_counter() rtc = t1 - t0 # Read the uncompressed file. t0 = time.perf_counter() x = load_raw_data(path, n_channels=n_channels, dtype=dtype, mmap=False) assert x.size t1 = time.perf_counter() rtdec = t1 - t0 orig_size = path.stat().st_size compressed_size = out.stat().st_size return { 'read_time_compressed': rtc, 'read_time_decompressed': rtdec, 'write_time': wt, 'ratio': 100 - 100 * compressed_size / orig_size, }
def test_decompress_pool(path, arr): _write_arr(path, arr) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' compress( path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype, check_after_compress=False) reader = decompress(out, outmeta, cache_size=2) pool = reader.start_thread_pool() d1 = reader.decompress_chunks([0, 1, 2], pool=pool) d2 = reader.decompress_chunks([1, 2, 3], pool=pool) d3 = reader.decompress_chunks([0, 1, 3], pool=pool) reader.stop_thread_pool() assert sorted(d1.keys()) == [0, 1, 2] assert sorted(d2.keys()) == [1, 2, 3] assert sorted(d3.keys()) == [0, 1, 3]
def test_chop(path): arr = np.array(np.random.randint(low=0, high=255, size=(1000, 100)), dtype=np.int16) _write_arr(path, arr) out = path.parent / 'data.cbin' outmeta = path.parent / 'data.ch' compress( path, out, outmeta, sample_rate=100, n_channels=arr.shape[1], dtype=arr.dtype, ) # Chop with method #1 r = Reader() r.open(out, outmeta) out_chopped = out.with_name('data.chopped.cbin') assert r.n_chunks == 10 r.chop(5, out_chopped) r.close() with open(str(out_chopped), 'rb') as f: sha1_chopped = sha1(f.read()) # Check chopped file. r = Reader() r.open(out_chopped) assert r.n_chunks == 5 arr_chopped = r[:] assert arr_chopped.dtype == arr.dtype assert arr_chopped.shape == (500, 100) assert np.all(arr_chopped == arr[:500]) r.close() # Chop with method #2 out_chopped2 = path.parent / 'data.chopped2.cbin' outmeta_chopped2 = path.parent / 'data.chopped2.ch' _write_arr(path, arr[:500]) compress( path, out_chopped2, outmeta_chopped2, sample_rate=100, n_channels=arr.shape[1], dtype=arr.dtype, ) # Check that the chopped file is identical with both methods. with open(str(out_chopped2), 'rb') as f: sha1_chopped2 = sha1(f.read()) assert sha1_chopped == sha1_chopped2
def compress_file(self, keep_original=True, **kwargs): """ Compresses :param keep_original: defaults True. If False, the original uncompressed file is deleted and the current spikeglx.Reader object is modified in place :param kwargs: :return: pathlib.Path of the compressed *.cbin file """ file_out = self.file_bin.with_suffix('.cbin') assert not self.is_mtscomp mtscomp.compress(self.file_bin, out=file_out, outmeta=self.file_bin.with_suffix('.ch'), sample_rate=self.fs, n_channels=self.nc, dtype=np.int16, **kwargs) if not keep_original: self.file_bin.unlink() self.file_bin = file_out return file_out