Example #1
0
def test_comp_decomp(path):
    """Compress and decompress a random binary file with integer data type, and check the files
    are byte to byte equal. This would not work for floating-point data types."""
    arr = np.array(np.random.randint(low=0, high=255, size=(1000, 1000)), dtype=np.int16).T
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype,
    )
    decompressed_path = path.with_suffix('.decomp.bin')
    decompress(out, outmeta, out=decompressed_path)

    # Check the files are equal.
    with open(str(path), 'rb') as f:
        buf1 = f.read()
        sha1_original = sha1(buf1)
    with open(str(decompressed_path), 'rb') as f:
        buf2 = f.read()
        sha1_decompressed = sha1(buf2)
    assert buf1 == buf2

    # Check the SHA1s.
    with open(str(out), 'rb') as f:
        sha1_compressed = sha1(f.read())
    with open(str(outmeta), 'r') as f:
        meta = json.load(f)

    assert meta['sha1_compressed'] == sha1_compressed
    assert meta['sha1_uncompressed'] == sha1_decompressed == sha1_original
Example #2
0
 def decompress_file(self, keep_original=True, **kwargs):
     """
     Decompresses a mtscomp file
     :param keep_original: defaults True. If False, the original compressed file is deleted
      and the current spikeglx.Reader object is modified in place
     :return: pathlib.Path of the decompressed *.bin file
     """
     file_out = self.file_bin.with_suffix('.bin')
     assert self.is_mtscomp
     mtscomp.decompress(self.file_bin, self.file_bin.with_suffix('.ch'), out=file_out, **kwargs)
     if not keep_original:
         self.file_bin.unlink()
         self.file_bin.with_suffix('.ch').unlink()
         self.file_bin = file_out
     return file_out
Example #3
0
def load_raw_data(path=None, n_channels_dat=None, dtype=None, offset=None, order=None):
    """Load raw data at a given path."""
    if not path:
        return
    path = Path(path)
    if not path.exists():
        logger.warning("Path %s does not exist, trying ephys.raw filename.", path)
        path = path.parent / ('ephys.raw' + path.suffix)
        if not path.exists():
            logger.warning("Error while loading data: File `%s` not found.", path)
            return None
    assert path.exists()
    logger.debug("Loading traces at `%s`.", path)
    if str(path).endswith('.cbin'):  # pragma: no cover
        try:
            from mtscomp import decompress
            logger.debug("Decompressing %s on the fly with mtscomp.", path)
            return decompress(path)
        except ImportError:
            logger.warning(
                "The mtscomp package is not available, %s cannot be decompressed. "
                "In the meantime, the raw data will not be available.", path)
            return
    dtype = dtype if dtype is not None else np.int16
    return _dat_to_traces(path, n_channels=n_channels_dat, dtype=dtype, offset=offset, order=order)
Example #4
0
 def decompress_file(self, keep_original=True, **kwargs):
     """
     Decompresses a mtscomp file
     :param keep_original: defaults True. If False, the original compressed file (input)
     is deleted and the current spikeglx.Reader object is modified in place
     NB: This is not equivalent to overwrite (which replaces the output file)
     :return: pathlib.Path of the decompressed *.bin file
     """
     if 'out' not in kwargs:
         kwargs['out'] = self.file_bin.with_suffix('.bin')
     assert self.is_mtscomp
     mtscomp.decompress(self.file_bin, self.file_bin.with_suffix('.ch'), **kwargs)
     if not keep_original:
         self.file_bin.unlink()
         self.file_bin.with_suffix('.ch').unlink()
         self.file_bin = kwargs['out']
     return kwargs['out']
Example #5
0
    def download_raw_partial(self, url_cbin, url_ch, first_chunk=0, last_chunk=0):
        assert url_cbin.endswith('.cbin')
        assert url_ch.endswith('.ch')

        relpath = Path(url_cbin.replace(self._par.HTTP_DATA_SERVER, '.')).parents[0]
        target_dir = Path(self._get_cache_dir(None), relpath)
        Path(target_dir).mkdir(parents=True, exist_ok=True)

        # First, download the .ch file.
        ch_local_path = Path(wc.http_download_file(
            url_ch,
            username=self._par.HTTP_DATA_SERVER_LOGIN,
            password=self._par.HTTP_DATA_SERVER_PWD,
            cache_dir=target_dir, clobber=True, offline=False, return_md5=False))
        ch_local_path = remove_uuid_file(ch_local_path)
        ch_local_path = ch_local_path.rename(ch_local_path.with_suffix('.chopped.ch'))
        assert ch_local_path.exists()

        # Load the .ch file.
        with open(ch_local_path, 'r') as f:
            cmeta = json.load(f)

        # Get the first byte and number of bytes to download.
        i0 = cmeta['chunk_bounds'][first_chunk]
        cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk + 2]
        cmeta['chunk_bounds'] = [_ - i0 for _ in cmeta['chunk_bounds']]
        assert len(cmeta['chunk_bounds']) >= 2
        assert cmeta['chunk_bounds'][0] == 0

        first_byte = cmeta['chunk_offsets'][first_chunk]
        cmeta['chunk_offsets'] = cmeta['chunk_offsets'][first_chunk:last_chunk + 2]
        cmeta['chunk_offsets'] = [_ - first_byte for _ in cmeta['chunk_offsets']]
        assert len(cmeta['chunk_offsets']) >= 2
        assert cmeta['chunk_offsets'][0] == 0
        n_bytes = cmeta['chunk_offsets'][-1]
        assert n_bytes > 0

        # Save the chopped chunk bounds and ossets.
        cmeta['sha1_compressed'] = None
        cmeta['sha1_uncompressed'] = None
        cmeta['chopped'] = True
        with open(ch_local_path, 'w') as f:
            json.dump(cmeta, f, indent=2, sort_keys=True)

        # Download the requested chunks
        cbin_local_path = wc.http_download_file(
            url_cbin,
            username=self._par.HTTP_DATA_SERVER_LOGIN,
            password=self._par.HTTP_DATA_SERVER_PWD,
            cache_dir=target_dir, clobber=True, offline=False, return_md5=False,
            chunks=(first_byte, n_bytes))
        cbin_local_path = remove_uuid_file(cbin_local_path)
        cbin_local_path = cbin_local_path.rename(cbin_local_path.with_suffix('.chopped.cbin'))
        assert cbin_local_path.exists()

        import mtscomp
        reader = mtscomp.decompress(cbin_local_path, cmeta=ch_local_path)
        return reader[:]
Example #6
0
def mtscomp_perf(**kwargs):
    ds = kwargs.pop('ds', None)
    assert ds

    name, n_channels, sample_rate, duration = ds

    # Compress the file.
    path = Path('data/' + name)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    t0 = time.perf_counter()
    compress(path,
             out,
             outmeta,
             sample_rate=sample_rate,
             n_channels=n_channels,
             dtype=dtype,
             check_after_compress=False,
             **kwargs)
    t1 = time.perf_counter()
    wt = t1 - t0

    # Decompress the file and write it to disk.
    out2 = path.with_suffix('.decomp.bin')
    t0 = time.perf_counter()
    decompress(out, outmeta, out2, check_after_decompress=False)
    t1 = time.perf_counter()
    rtc = t1 - t0

    # Read the uncompressed file.
    t0 = time.perf_counter()
    x = load_raw_data(path, n_channels=n_channels, dtype=dtype, mmap=False)
    assert x.size
    t1 = time.perf_counter()
    rtdec = t1 - t0

    orig_size = path.stat().st_size
    compressed_size = out.stat().st_size

    return {
        'read_time_compressed': rtc,
        'read_time_decompressed': rtdec,
        'write_time': wt,
        'ratio': 100 - 100 * compressed_size / orig_size,
    }
Example #7
0
def _round_trip(path, arr, **ckwargs):
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1],
        dtype=arr.dtype, **ckwargs)
    unc = decompress(out, outmeta)
    assert np.allclose(unc[:], arr)
    return unc
Example #8
0
def traces(request, tempdir, arr, sample_rate):
    if request.param == 'numpy':
        return get_ephys_reader(arr, sample_rate=sample_rate)

    elif request.param == 'npy':
        path = tempdir / 'data.npy'
        np.save(path, arr)
        return get_ephys_reader(path, sample_rate=sample_rate)

    elif request.param == 'flat':
        path = tempdir / 'data.bin'
        with open(path, 'wb') as f:
            arr.tofile(f)
        return get_ephys_reader(path,
                                sample_rate=sample_rate,
                                dtype=arr.dtype,
                                n_channels=arr.shape[1])

    elif request.param == 'flat_concat':
        path0 = tempdir / 'data0.bin'
        with open(path0, 'wb') as f:
            arr[:arr.shape[0] // 2, :].tofile(f)
        path1 = tempdir / 'data1.bin'
        with open(path1, 'wb') as f:
            arr[arr.shape[0] // 2:, :].tofile(f)
        return get_ephys_reader([path0, path1],
                                sample_rate=sample_rate,
                                dtype=arr.dtype,
                                n_channels=arr.shape[1])

    elif request.param in ('mtscomp', 'mtscomp_reader'):
        path = tempdir / 'data.bin'
        with open(path, 'wb') as f:
            arr.tofile(f)
        out = tempdir / 'data.cbin'
        outmeta = tempdir / 'data.ch'
        mtscomp.compress(path,
                         out,
                         outmeta,
                         sample_rate=sample_rate,
                         n_channels=arr.shape[1],
                         dtype=arr.dtype,
                         n_threads=1,
                         check_after_compress=False,
                         quiet=True)
        reader = mtscomp.decompress(out,
                                    outmeta,
                                    check_after_decompress=False,
                                    quiet=True)
        if request.param == 'mtscomp':
            return get_ephys_reader(reader)
        else:
            return get_ephys_reader(out)
Example #9
0
def test_3d(path):
    file_npy = path.parent.joinpath('titi.npy')
    file_cnpy = path.parent.joinpath('titi.cnpy')
    array = np.random.randint(-5000, high=5000, size=(100, 120, 130), dtype=np.int16)
    np.save(file_npy, array)
    # two way trip - makes sure that
    # 1) the sample_rate fed as an int64 doesn't error
    # 2) the initial shape of the array is saved in the meta-data
    mtscomp_mod.compress(file_npy,
                         out=file_cnpy,
                         outmeta=file_cnpy.with_suffix('.ch'),
                         sample_rate=np.prod(array.shape[1:]),  # here needs to cast as float
                         dtype=array.dtype,
                         do_time_diff=False)
    d = mtscomp_mod.decompress(file_cnpy, cmeta=file_cnpy.with_suffix('.ch'))
    assert np.all(np.isclose(d[:, :].reshape(d.cmeta.shape), array))
Example #10
0
def test_ephys_traces_2(tempdir):
    data = (50 * np.random.randn(1000, 10)).astype(np.int16)
    sample_rate = 100
    path = tempdir / 'data.bin'

    with open(path, 'wb') as f:
        data.tofile(f)

    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    mtscomp.compress(path,
                     out,
                     outmeta,
                     sample_rate=sample_rate,
                     n_channels=data.shape[1],
                     dtype=data.dtype,
                     n_threads=1,
                     check_after_compress=False,
                     quiet=True)
    reader = mtscomp.decompress(out,
                                outmeta,
                                check_after_decompress=False,
                                quiet=True)

    traces = get_ephys_traces(reader)

    assert isinstance(traces, EphysTraces)
    assert isinstance(traces, da.Array)

    assert traces.dtype == data.dtype
    assert traces.shape == data.shape
    assert traces.chunks == ((100, ) * 10, (10, ))

    assert bool(np.all(data == traces).compute()) is True
    assert traces.chunk_bounds == reader.chunk_bounds

    spike_times = [5, 50, 100, 901]
    spike_chunks = traces._get_time_chunks(spike_times)
    ae(spike_chunks, [0, 0, 1, 9])

    waveforms = traces.extract_waveforms(spike_times, [1, 4, 7], 10)
    assert waveforms.shape == (4, 10, 3)

    traces_sub = traces.subset_time_range(2.5, 7.5)
    assert traces_sub.shape == (500, 10)
    assert bool(np.all(traces[250:750, :] == traces_sub).compute()) is True
Example #11
0
def test_decompress_pool(path, arr):
    _write_arr(path, arr)
    out = path.parent / 'data.cbin'
    outmeta = path.parent / 'data.ch'
    compress(
        path, out, outmeta, sample_rate=sample_rate, n_channels=arr.shape[1], dtype=arr.dtype,
        check_after_compress=False)
    reader = decompress(out, outmeta, cache_size=2)

    pool = reader.start_thread_pool()
    d1 = reader.decompress_chunks([0, 1, 2], pool=pool)
    d2 = reader.decompress_chunks([1, 2, 3], pool=pool)
    d3 = reader.decompress_chunks([0, 1, 3], pool=pool)
    reader.stop_thread_pool()

    assert sorted(d1.keys()) == [0, 1, 2]
    assert sorted(d2.keys()) == [1, 2, 3]
    assert sorted(d3.keys()) == [0, 1, 3]
Example #12
0
    def _run(self, overwrite=False):

        efiles = spikeglx.glob_ephys_files(self.session_path)

        apfiles = [(ef.get('ap'), ef.get('label')) for ef in efiles
                   if 'ap' in ef.keys()]
        for ap_file, label in apfiles:
            # check for pre-existing spike-sorting
            # the spike sorting output can either be with the probe (<1.5.5) or in the
            # session_path/spike_sorters/ks2_matlab/probeXX folder
            ks2_dir = self.session_path.joinpath('spike_sorters', 'ks2_matlab',
                                                 label)
            if ap_file.parent.joinpath('spike_sorting_ks2.log').exists():
                _logger.info(
                    f'Already ran: spike_sorting_ks2.log found for {ap_file}, skipping.'
                )
                continue  # this will label the job with ok status in the database
            if ks2_dir.joinpath('spike_sorting_ks2.log').exists():
                _logger.info(
                    f'Already ran: spike_sorting_ks2.log found in {ks2_dir}, skipping.'
                )
                continue
            # get the scratch drive from the shell script
            SHELL_SCRIPT = Path.home().joinpath(
                "Documents/PYTHON/iblscripts/deploy/serverpc/kilosort2/task_ks2_matlab.sh"
            )
            with open(SHELL_SCRIPT) as fid:
                lines = fid.readlines()
            line = [
                line for line in lines if line.startswith('SCRATCH_DRIVE=')
            ][0]
            m = re.search(r"\=(.*?)(\#|\n)", line)[0]
            scratch_drive = Path(m[1:-1].strip())
            assert (scratch_drive.exists())

            # clean up and create directory, this also checks write permissions
            # scratch dir has the following shape: ks2m/ZM_3003_2020-07-29_001_probe00
            # first makes sure the tmp dir is clean
            shutil.rmtree(scratch_drive.joinpath('ks2m'), ignore_errors=True)
            scratch_dir = scratch_drive.joinpath(
                'ks2m', '_'.join(list(self.session_path.parts[-3:]) + [label]))
            if scratch_dir.exists():
                shutil.rmtree(scratch_dir, ignore_errors=True)
            scratch_dir.mkdir(parents=True, exist_ok=True)

            # decompresses using mtscomp
            tmp_ap_file = scratch_dir.joinpath(
                ap_file.name).with_suffix('.bin')
            mtscomp.decompress(cdata=ap_file, out=tmp_ap_file)

            # run matlab spike sorting: with R2019a, it would be much easier to run with
            # -batch option as matlab errors are redirected to stderr automatically
            command2run = f"{SHELL_SCRIPT} {scratch_dir}"
            _logger.info(command2run)
            process = subprocess.Popen(command2run,
                                       shell=True,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       executable="/bin/bash")
            info, error = process.communicate()
            info_str = info.decode('utf-8').strip()
            if process.returncode != 0:
                raise RuntimeError(error.decode('utf-8'))
            elif 'run_ks2_ibl.m failed' in info_str:
                raise RuntimeError('Matlab error ks2 log below:')
                _logger.info(info_str)

            # clean up and copy: output to session/spike_sorters/ks2_matlab/probeXX (ks2_dir)
            tmp_ap_file.unlink()  # remove the uncompressed temp binary file
            scratch_dir.joinpath('temp_wh.dat').unlink(
            )  # remove the memmapped pre-processed file
            shutil.move(scratch_dir, ks2_dir)

            self.version = self._fetch_ks2_commit_hash()
        return []  # the job will be labeled as complete with empty string
Example #13
0
    def _run_ks2(self, ap_file):
        """
        Runs the ks2 matlab spike sorting for one probe dataset
        the spike sorting output can either be with the probe (<1.5.5) or in the
        session_path/spike_sorters/ks2_matlab/probeXX folder
        :return: path of the folder containing ks2 spike sorting output
        """
        label = ap_file.parts[-2]
        if ap_file.parent.joinpath("spike_sorting_ks2.log").exists():
            _logger.info(
                f"Already ran: spike_sorting_ks2.log found for {ap_file}, skipping."
            )
            return ap_file.parent
        ks2_dir = self.session_path.joinpath("spike_sorters", "ks2_matlab",
                                             label)
        if ks2_dir.joinpath("spike_sorting_ks2.log").exists():
            _logger.info(
                f"Already ran: spike_sorting_ks2.log found in {ks2_dir}, skipping."
            )
            return ks2_dir
        # get the scratch drive from the shell script
        SHELL_SCRIPT = Path.home().joinpath(
            "Documents/PYTHON/iblscripts/deploy/serverpc/kilosort2/task_ks2_matlab.sh"
        )
        with open(SHELL_SCRIPT) as fid:
            lines = fid.readlines()
        line = [line for line in lines if line.startswith("SCRATCH_DRIVE=")][0]
        m = re.search(r"\=(.*?)(\#|\n)", line)[0]
        scratch_drive = Path(m[1:-1].strip())
        assert scratch_drive.exists()

        # clean up and create directory, this also checks write permissions
        # scratch dir has the following shape: ks2m/ZM_3003_2020-07-29_001_probe00
        # first makes sure the tmp dir is clean
        shutil.rmtree(scratch_drive.joinpath("ks2m"), ignore_errors=True)
        scratch_dir = scratch_drive.joinpath(
            "ks2m", "_".join(list(self.session_path.parts[-3:]) + [label]))
        if scratch_dir.exists():
            shutil.rmtree(scratch_dir, ignore_errors=True)
        scratch_dir.mkdir(parents=True, exist_ok=True)

        # decompresses using mtscomp
        tmp_ap_file = scratch_dir.joinpath(ap_file.name).with_suffix(".bin")
        mtscomp.decompress(cdata=ap_file, out=tmp_ap_file)

        # run matlab spike sorting: with R2019a, it would be much easier to run with
        # -batch option as matlab errors are redirected to stderr automatically
        command2run = f"{SHELL_SCRIPT} {scratch_dir}"
        _logger.info(command2run)
        process = subprocess.Popen(
            command2run,
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            executable="/bin/bash",
        )
        info, error = process.communicate()
        info_str = info.decode("utf-8").strip()
        if process.returncode != 0:
            raise RuntimeError(error.decode("utf-8"))
        elif "run_ks2_ibl.m failed" in info_str:
            raise RuntimeError("Matlab error ks2 log below:")
            _logger.info(info_str)

        # clean up and copy: output to session/spike_sorters/ks2_matlab/probeXX (ks2_dir)
        tmp_ap_file.unlink()  # remove the uncompressed temp binary file
        scratch_dir.joinpath(
            "temp_wh.dat").unlink()  # remove the memmapped pre-processed file
        shutil.move(scratch_dir, ks2_dir)

        self.version = self._fetch_ks2_commit_hash()
        return ks2_dir