Beispiel #1
0
    def compress_NP24(self, overwrite=False, **kwargs):
        """
        Compress spikeglx files
        :return:
        """
        for sh in self.shank_info.keys():
            bin_file = self.shank_info[sh]['ap_file']
            if overwrite:
                cbin_file = bin_file.with_suffix('.cbin')
                cbin_file.unlink()

            sr_ap = spikeglx.Reader(bin_file)
            cbin_file = sr_ap.compress_file(**kwargs)
            sr_ap.close()
            bin_file.unlink()
            self.shank_info[sh]['ap_file'] = cbin_file

            bin_file = self.shank_info[sh]['lf_file']
            if overwrite:
                cbin_file = bin_file.with_suffix('.cbin')
                cbin_file.unlink()
            sr_lf = spikeglx.Reader(bin_file)
            cbin_file = sr_lf.compress_file(**kwargs)
            sr_lf.close()
            bin_file.unlink()
            self.shank_info[sh]['lf_file'] = cbin_file
Beispiel #2
0
    def load_data(self) -> None:
        """
        Load any locally available data.
        """
        # First sanity check
        self._ensure_required_data()

        _logger.info('Gathering data for QC')
        # Load metadata and, if locally present, bin file
        for dstype in ['ap', 'lf']:
            # We already checked that there is not more than one meta file per type
            meta_file = next(self.probe_path.rglob(f'*{dstype}.meta'), None)
            if meta_file is None:
                _logger.warning(f'No {dstype}.meta file in {self.probe_path}, skipping QC for {dstype} data.')
            else:
                self.data[f'{dstype}_meta'] = spikeglx.read_meta_data(meta_file)
                bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None)
                if not bin_file:
                    # we only stream the AP file, we won't stream the full LF file...
                    if dstype == 'ap':
                        self.data[f'{dstype}'] = Streamer(pid=self.pid, one=self.one, remove_cached=True)
                    else:
                        self.data[f'{dstype}'] = None
                else:
                    self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True)
Beispiel #3
0
    def check_NP24(self):
        """
        Check that the splitting into shanks process has completed correctly. Compares the original
        file to the reconstructed file from the individual shanks

        :return:
        """
        for sh in self.shank_info.keys():
            self.shank_info[sh]['sr'] = spikeglx.Reader(
                self.shank_info[sh]['ap_file'])

        wg = WindowGenerator(self.nsamples, self.samples_window, 0)
        for first, last in wg.firstlast:
            expected = self.sr[first:last, :]
            chunk = np.zeros_like(expected)
            for ish, sh in enumerate(self.shank_info.keys()):
                if ish == 0:
                    chunk[:, self.shank_info[sh]['chns']] = self.shank_info[
                        sh]['sr'][first:last, :]
                else:
                    chunk[:, self.shank_info[sh]['chns'][:-1]] = \
                        self.shank_info[sh]['sr'][first:last, :-1]
            assert np.array_equal(expected, chunk), \
                'data in original file and split files do no match'

        # close the sglx instances once we are done checking
        for sh in self.shank_info.keys():
            sr = self.shank_info[sh].pop('sr')
            sr.close()

        self.check_completed = True
Beispiel #4
0
def _sync_to_alf(raw_ephys_apfile, output_path=None, save=False, parts=''):
    """
    Extracts sync.times, sync.channels and sync.polarities from binary ephys dataset

    :param raw_ephys_apfile: bin file containing ephys data or spike
    :param output_path: output directory
    :param save: bool write to disk only if True
    :param parts: string or list of strings that will be appended to the filename before extension
    :return:
    """
    # handles input argument: support ibllib.io.spikeglx.Reader, str and pathlib.Path
    if isinstance(raw_ephys_apfile, spikeglx.Reader):
        sr = raw_ephys_apfile
    else:
        raw_ephys_apfile = Path(raw_ephys_apfile)
        sr = spikeglx.Reader(raw_ephys_apfile)
    opened = sr.is_open
    if not opened:  # if not (opened := sr.is_open)  # py3.8
        sr.open()
    # if no output, need a temp folder to swap for big files
    if not output_path:
        output_path = raw_ephys_apfile.parent
    file_ftcp = Path(output_path).joinpath(
        f'fronts_times_channel_polarity{str(uuid.uuid4())}.bin')

    # loop over chunks of the raw ephys file
    wg = neurodsp.utils.WindowGenerator(sr.ns,
                                        int(SYNC_BATCH_SIZE_SECS * sr.fs),
                                        overlap=1)
    fid_ftcp = open(file_ftcp, 'wb')
    for sl in wg.slice:
        ss = sr.read_sync(sl)
        ind, fronts = neurodsp.utils.fronts(ss, axis=0)
        # a = sr.read_sync_analog(sl)
        sav = np.c_[(ind[0, :] + sl.start) / sr.fs, ind[1, :],
                    fronts.astype(np.double)]
        sav.tofile(fid_ftcp)
    # close temp file, read from it and delete
    fid_ftcp.close()
    tim_chan_pol = np.fromfile(str(file_ftcp))
    tim_chan_pol = tim_chan_pol.reshape((int(tim_chan_pol.size / 3), 3))
    file_ftcp.unlink()
    sync = {
        'times': tim_chan_pol[:, 0],
        'channels': tim_chan_pol[:, 1],
        'polarities': tim_chan_pol[:, 2]
    }
    # If opened Reader was passed into function, leave open
    if not opened:
        sr.close()
    if save:
        out_files = alfio.save_object_npy(output_path,
                                          sync,
                                          'sync',
                                          namespace='spikeglx',
                                          parts=parts)
        return Bunch(sync), out_files
    else:
        return Bunch(sync)
Beispiel #5
0
    def compress_NP21(self, overwrite=False):
        """
        Compress spikeglx files
        :return:
        """
        for sh in self.shank_info.keys():
            if not self.sr.is_mtscomp:
                cbin_file = self.sr.compress_file()
                self.sr.close()
                self.ap_file.unlink()
                self.ap_file = cbin_file
                self.sr = spikeglx.Reader(self.ap_file)

            bin_file = self.shank_info[sh]['lf_file']
            if overwrite:
                cbin_file = bin_file.with_suffix('.cbin')
                cbin_file.unlink()
            sr_lf = spikeglx.Reader(bin_file)
            cbin_file = sr_lf.compress_file()
            sr_lf.close()
            bin_file.unlink()
            self.shank_info[sh]['lf_file'] = cbin_file
Beispiel #6
0
 def __init__(self,
              ap_file,
              post_check=True,
              delete_original=False,
              compress=True):
     """
     :param ap_file: ap.bin spikeglx file to process
     :param post_check: whether to apply post-check integrity test to ensure split content is
     identical to original content (only applicable to NP2.4)
     :param delete_original: whether to delete the original ap file after data has been split
     :param compress: whether to apply mtscomp to extracted .bin files
     split into shanks (only applicable to NP2.4)
     """
     self.ap_file = Path(ap_file)
     self.sr = spikeglx.Reader(ap_file)
     self.post_check = post_check
     self.compress = compress
     self.delete_original = delete_original
     self.np_version = spikeglx._get_neuropixel_version_from_meta(
         self.sr.meta)
     self.check_metadata()
     self.init_params()
Beispiel #7
0
def extract_sync(session_path, overwrite=False, ephys_files=None):
    """
    Reads ephys binary file (s) and extract sync within the binary file folder
    Assumes ephys data is within a `raw_ephys_data` folder

    :param session_path: '/path/to/subject/yyyy-mm-dd/001'
    :param overwrite: Bool on re-extraction, forces overwrite instead of loading existing files
    :return: list of sync dictionaries
    """
    session_path = Path(session_path)
    if not ephys_files:
        ephys_files = spikeglx.glob_ephys_files(session_path)
    syncs = []
    outputs = []
    for efi in ephys_files:
        bin_file = efi.get('ap', efi.get('nidq', None))
        if not bin_file:
            continue
        alfname = dict(object='sync', namespace='spikeglx')
        if efi.label:
            alfname['extra'] = efi.label
        file_exists = alfio.exists(bin_file.parent, **alfname)
        if not overwrite and file_exists:
            _logger.warning(
                f'Skipping raw sync: SGLX sync found for probe {efi.label}!')
            sync = alfio.load_object(bin_file.parent, **alfname)
            out_files, _ = alfio._ls(bin_file.parent, **alfname)
        else:
            sr = spikeglx.Reader(bin_file)
            sync, out_files = _sync_to_alf(sr,
                                           bin_file.parent,
                                           save=True,
                                           parts=efi.label)
        outputs.extend(out_files)
        syncs.extend([sync])

    return syncs, outputs
Beispiel #8
0
    def _download_raw_partial(self, first_chunk=0, last_chunk=0):
        """
        downloads one or several chunks of a mtscomp file and copy ch files and metadata to return
        a spikeglx.Reader object
        :param first_chunk:
        :param last_chunk:
        :return: spikeglx.Reader of the current chunk, Pathlib.Path of the directory where it is stored
        """
        assert str(self.url_cbin).endswith('.cbin')
        webclient = self.one.alyx
        relpath = Path(
            self.url_cbin.replace(webclient._par.HTTP_DATA_SERVER,
                                  '.')).parents[0]
        # write the temp file into a subdirectory
        tdir_chunk = f"chunk_{str(first_chunk).zfill(6)}_to_{str(last_chunk).zfill(6)}"
        target_dir = Path(self.cache_folder, relpath, tdir_chunk)
        self.target_dir = target_dir
        Path(target_dir).mkdir(parents=True, exist_ok=True)
        ch_file_stream = target_dir.joinpath(
            self.file_chunks.name).with_suffix('.stream.ch')

        # Get the first sample index, and the number of samples to download.
        i0 = self.chunks['chunk_bounds'][first_chunk]
        ns_stream = self.chunks['chunk_bounds'][last_chunk + 1] - i0
        total_samples = self.chunks['chunk_bounds'][-1]

        # handles the meta file
        meta_local_path = ch_file_stream.with_suffix('.meta')
        if not meta_local_path.exists():
            shutil.copy(self.file_chunks.with_suffix('.meta'), meta_local_path)

        # if the cached version happens to be the same as the one on disk, just load it
        if ch_file_stream.exists():
            with open(ch_file_stream, 'r') as f:
                cmeta_stream = json.load(f)
            if (cmeta_stream.get('chopped_first_sample', None) == i0
                    and cmeta_stream.get('chopped_total_samples',
                                         None) == total_samples):
                return spikeglx.Reader(ch_file_stream.with_suffix('.cbin'),
                                       ignore_warnings=True)

        else:
            shutil.copy(self.file_chunks, ch_file_stream)
        assert ch_file_stream.exists()

        cmeta = self.chunks.copy()
        # prepare the metadata file
        cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk +
                                                      2]
        cmeta['chunk_bounds'] = [int(_ - i0) for _ in cmeta['chunk_bounds']]
        assert len(cmeta['chunk_bounds']) >= 2
        assert cmeta['chunk_bounds'][0] == 0

        first_byte = cmeta['chunk_offsets'][first_chunk]
        cmeta['chunk_offsets'] = cmeta['chunk_offsets'][
            first_chunk:last_chunk + 2]
        cmeta['chunk_offsets'] = [
            _ - first_byte for _ in cmeta['chunk_offsets']
        ]
        assert len(cmeta['chunk_offsets']) >= 2
        assert cmeta['chunk_offsets'][0] == 0
        n_bytes = cmeta['chunk_offsets'][-1]
        assert n_bytes > 0

        # Save the chopped chunk bounds and offsets.
        cmeta['sha1_compressed'] = None
        cmeta['sha1_uncompressed'] = None
        cmeta['chopped'] = True
        cmeta['chopped_first_sample'] = int(i0)
        cmeta['chopped_samples'] = int(ns_stream)
        cmeta['chopped_total_samples'] = int(total_samples)

        with open(ch_file_stream, 'w') as f:
            json.dump(cmeta, f, indent=2, sort_keys=True)

        # Download the requested chunks
        cbin_local_path = webclient.download_file(self.url_cbin,
                                                  chunks=(first_byte, n_bytes),
                                                  target_dir=target_dir,
                                                  clobber=True,
                                                  return_md5=False)
        cbin_local_path = remove_uuid_file(cbin_local_path)
        cbin_local_path_renamed = cbin_local_path.with_suffix('.stream.cbin')
        cbin_local_path.replace(cbin_local_path_renamed)
        assert cbin_local_path_renamed.exists()

        reader = spikeglx.Reader(cbin_local_path_renamed, ignore_warnings=True)
        return reader
Beispiel #9
0
def extract_waveforms(ephys_file,
                      ts,
                      ch,
                      t=2.0,
                      sr=30000,
                      n_ch_probe=385,
                      car=True):
    """
    Extracts spike waveforms from binary ephys data file, after (optionally)
    common-average-referencing (CAR) spatial noise.

    Parameters
    ----------
    ephys_file : string
        The file path to the binary ephys data.
    ts : ndarray_like
        The timestamps (in s) of the spikes.
    ch : ndarray_like
        The channels on which to extract the waveforms.
    t : numeric (optional)
        The time (in ms) of each returned waveform.
    sr : int (optional)
        The sampling rate (in hz) that the ephys data was acquired at.
    n_ch_probe : int (optional)
        The number of channels of the recording.
    car: bool (optional)
        A flag to perform CAR before extracting waveforms.

    Returns
    -------
    waveforms : ndarray
        An array of shape (#spikes, #samples, #channels) containing the waveforms.

    Examples
    --------
    1) Extract all the waveforms for unit1 with and without CAR.
        >>> import numpy as np
        >>> import brainbox as bb
        >>> import alf.io as aio
        >>> import ibllib.ephys.spikes as e_spks
        (*Note, if there is no 'alf' directory, make 'alf' directory from 'ks2' output directory):
        >>> e_spks.ks2_to_alf(path_to_ks_out, path_to_alf_out)
        # Get a clusters bunch and a units bunch from a spikes bunch from an alf directory.
        >>> clstrs_b = aio.load_object(path_to_alf_out, 'clusters')
        >>> spks_b = aio.load_object(path_to_alf_out, 'spikes')
        >>> units_b = bb.processing.get_units_bunch(spks, ['times'])
        # Get the timestamps and 20 channels around the max amp channel for unit1, and extract the
        # two sets of waveforms.
        >>> ts = units_b['times']['1']
        >>> max_ch = max_ch = clstrs_b['channels'][1]
        >>> if max_ch < 10:  # take only channels greater than `max_ch`.
        >>>     ch = np.arange(max_ch, max_ch + 20)
        >>> elif (max_ch + 10) > 385:  # take only channels less than `max_ch`.
        >>>     ch = np.arange(max_ch - 20, max_ch)
        >>> else:  # take `n_c_ch` around `max_ch`.
        >>>     ch = np.arange(max_ch - 10, max_ch + 10)
        >>> wf = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=False)
        >>> wf_car = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=True)
    """

    # Get memmapped array of `ephys_file`
    with spikeglx.Reader(ephys_file) as s_reader:
        file_m = s_reader.data  # the memmapped array
        n_wf_samples = int(
            sr / 1000 *
            (t / 2))  # number of samples to return on each side of a ts
        ts_samples = np.array(ts * sr).astype(
            int)  # the samples corresponding to `ts`
        t_sample_first = ts_samples[0] - n_wf_samples

        # Exception handling for impossible channels
        ch = np.asarray(ch)
        ch = ch.reshape((ch.size, 1)) if ch.size == 1 else ch
        if np.any(ch < 0) or np.any(ch > n_ch_probe):
            raise Exception(
                'At least one specified channel number is impossible. '
                f'The minimum channel number was {np.min(ch)}, '
                f'and the maximum channel number was {np.max(ch)}. '
                'Check specified channel numbers and try again.')

        if car:  # compute spatial noise in chunks
            # see https://github.com/int-brain-lab/iblenv/issues/5
            raise NotImplementedError("CAR option is not available")

        # Initialize `waveforms`, extract waveforms from `file_m`, and CAR.
        waveforms = np.zeros((len(ts), 2 * n_wf_samples, ch.size))
        # Give time estimate for extracting waveforms.
        t0 = time.perf_counter()
        for i in range(5):
            waveforms[i, :, :] = \
                file_m[i * n_wf_samples * 2 + t_sample_first:
                       i * n_wf_samples * 2 + t_sample_first + n_wf_samples * 2, ch].reshape(
                           (n_wf_samples * 2, ch.size))
        dt = time.perf_counter() - t0
        print(
            'Performing waveform extraction. Estimated time is {:.2f} mins. ({})'
            .format(dt * len(ts) / 60 / 5, time.ctime()))
        for spk, _ in enumerate(ts):  # extract waveforms
            spk_ts_sample = ts_samples[spk]
            spk_samples = np.arange(spk_ts_sample - n_wf_samples,
                                    spk_ts_sample + n_wf_samples)
            # have to reshape to add an axis to broadcast `file_m` into `waveforms`
            waveforms[spk, :, :] = \
                file_m[spk_samples[0]:spk_samples[-1] + 1, ch].reshape((spk_samples.size, ch.size))
        print('Done. ({})'.format(time.ctime()))

    return waveforms
import neurodsp.utils
import spikeglx
import ibllib.io.extractors.ephys_fpga

BATCH_SIZE_SAMPLES = 50000

# full path to the raw ephys
raw_ephys_apfile = (
    '/datadisk/Data/Subjects/ZM_1150/2019-05-07/001/raw_ephys_data/probe_right/'
    'ephysData_g0_t0.imec.ap.bin')
output_path = '/home/olivier/scratch'

# load reader object, and extract sync traces
sr = spikeglx.Reader(raw_ephys_apfile)
sync = ibllib.io.extractors.ephys_fpga._sync_to_alf(sr,
                                                    output_path,
                                                    save=False)

# if the data is needed as well, loop over the file
# raw data contains raw ephys traces, while raw_sync contains the 16 sync traces
wg = neurodsp.utils.WindowGenerator(sr.ns, BATCH_SIZE_SAMPLES, overlap=1)
for first, last in wg.firstlast:
    rawdata, rawsync = sr.read_samples(first, last)
    wg.print_progress()