def compress_NP24(self, overwrite=False, **kwargs): """ Compress spikeglx files :return: """ for sh in self.shank_info.keys(): bin_file = self.shank_info[sh]['ap_file'] if overwrite: cbin_file = bin_file.with_suffix('.cbin') cbin_file.unlink() sr_ap = spikeglx.Reader(bin_file) cbin_file = sr_ap.compress_file(**kwargs) sr_ap.close() bin_file.unlink() self.shank_info[sh]['ap_file'] = cbin_file bin_file = self.shank_info[sh]['lf_file'] if overwrite: cbin_file = bin_file.with_suffix('.cbin') cbin_file.unlink() sr_lf = spikeglx.Reader(bin_file) cbin_file = sr_lf.compress_file(**kwargs) sr_lf.close() bin_file.unlink() self.shank_info[sh]['lf_file'] = cbin_file
def load_data(self) -> None: """ Load any locally available data. """ # First sanity check self._ensure_required_data() _logger.info('Gathering data for QC') # Load metadata and, if locally present, bin file for dstype in ['ap', 'lf']: # We already checked that there is not more than one meta file per type meta_file = next(self.probe_path.rglob(f'*{dstype}.meta'), None) if meta_file is None: _logger.warning(f'No {dstype}.meta file in {self.probe_path}, skipping QC for {dstype} data.') else: self.data[f'{dstype}_meta'] = spikeglx.read_meta_data(meta_file) bin_file = next(meta_file.parent.glob(f'*{dstype}.*bin'), None) if not bin_file: # we only stream the AP file, we won't stream the full LF file... if dstype == 'ap': self.data[f'{dstype}'] = Streamer(pid=self.pid, one=self.one, remove_cached=True) else: self.data[f'{dstype}'] = None else: self.data[f'{dstype}'] = spikeglx.Reader(bin_file, open=True)
def check_NP24(self): """ Check that the splitting into shanks process has completed correctly. Compares the original file to the reconstructed file from the individual shanks :return: """ for sh in self.shank_info.keys(): self.shank_info[sh]['sr'] = spikeglx.Reader( self.shank_info[sh]['ap_file']) wg = WindowGenerator(self.nsamples, self.samples_window, 0) for first, last in wg.firstlast: expected = self.sr[first:last, :] chunk = np.zeros_like(expected) for ish, sh in enumerate(self.shank_info.keys()): if ish == 0: chunk[:, self.shank_info[sh]['chns']] = self.shank_info[ sh]['sr'][first:last, :] else: chunk[:, self.shank_info[sh]['chns'][:-1]] = \ self.shank_info[sh]['sr'][first:last, :-1] assert np.array_equal(expected, chunk), \ 'data in original file and split files do no match' # close the sglx instances once we are done checking for sh in self.shank_info.keys(): sr = self.shank_info[sh].pop('sr') sr.close() self.check_completed = True
def _sync_to_alf(raw_ephys_apfile, output_path=None, save=False, parts=''): """ Extracts sync.times, sync.channels and sync.polarities from binary ephys dataset :param raw_ephys_apfile: bin file containing ephys data or spike :param output_path: output directory :param save: bool write to disk only if True :param parts: string or list of strings that will be appended to the filename before extension :return: """ # handles input argument: support ibllib.io.spikeglx.Reader, str and pathlib.Path if isinstance(raw_ephys_apfile, spikeglx.Reader): sr = raw_ephys_apfile else: raw_ephys_apfile = Path(raw_ephys_apfile) sr = spikeglx.Reader(raw_ephys_apfile) opened = sr.is_open if not opened: # if not (opened := sr.is_open) # py3.8 sr.open() # if no output, need a temp folder to swap for big files if not output_path: output_path = raw_ephys_apfile.parent file_ftcp = Path(output_path).joinpath( f'fronts_times_channel_polarity{str(uuid.uuid4())}.bin') # loop over chunks of the raw ephys file wg = neurodsp.utils.WindowGenerator(sr.ns, int(SYNC_BATCH_SIZE_SECS * sr.fs), overlap=1) fid_ftcp = open(file_ftcp, 'wb') for sl in wg.slice: ss = sr.read_sync(sl) ind, fronts = neurodsp.utils.fronts(ss, axis=0) # a = sr.read_sync_analog(sl) sav = np.c_[(ind[0, :] + sl.start) / sr.fs, ind[1, :], fronts.astype(np.double)] sav.tofile(fid_ftcp) # close temp file, read from it and delete fid_ftcp.close() tim_chan_pol = np.fromfile(str(file_ftcp)) tim_chan_pol = tim_chan_pol.reshape((int(tim_chan_pol.size / 3), 3)) file_ftcp.unlink() sync = { 'times': tim_chan_pol[:, 0], 'channels': tim_chan_pol[:, 1], 'polarities': tim_chan_pol[:, 2] } # If opened Reader was passed into function, leave open if not opened: sr.close() if save: out_files = alfio.save_object_npy(output_path, sync, 'sync', namespace='spikeglx', parts=parts) return Bunch(sync), out_files else: return Bunch(sync)
def compress_NP21(self, overwrite=False): """ Compress spikeglx files :return: """ for sh in self.shank_info.keys(): if not self.sr.is_mtscomp: cbin_file = self.sr.compress_file() self.sr.close() self.ap_file.unlink() self.ap_file = cbin_file self.sr = spikeglx.Reader(self.ap_file) bin_file = self.shank_info[sh]['lf_file'] if overwrite: cbin_file = bin_file.with_suffix('.cbin') cbin_file.unlink() sr_lf = spikeglx.Reader(bin_file) cbin_file = sr_lf.compress_file() sr_lf.close() bin_file.unlink() self.shank_info[sh]['lf_file'] = cbin_file
def __init__(self, ap_file, post_check=True, delete_original=False, compress=True): """ :param ap_file: ap.bin spikeglx file to process :param post_check: whether to apply post-check integrity test to ensure split content is identical to original content (only applicable to NP2.4) :param delete_original: whether to delete the original ap file after data has been split :param compress: whether to apply mtscomp to extracted .bin files split into shanks (only applicable to NP2.4) """ self.ap_file = Path(ap_file) self.sr = spikeglx.Reader(ap_file) self.post_check = post_check self.compress = compress self.delete_original = delete_original self.np_version = spikeglx._get_neuropixel_version_from_meta( self.sr.meta) self.check_metadata() self.init_params()
def extract_sync(session_path, overwrite=False, ephys_files=None): """ Reads ephys binary file (s) and extract sync within the binary file folder Assumes ephys data is within a `raw_ephys_data` folder :param session_path: '/path/to/subject/yyyy-mm-dd/001' :param overwrite: Bool on re-extraction, forces overwrite instead of loading existing files :return: list of sync dictionaries """ session_path = Path(session_path) if not ephys_files: ephys_files = spikeglx.glob_ephys_files(session_path) syncs = [] outputs = [] for efi in ephys_files: bin_file = efi.get('ap', efi.get('nidq', None)) if not bin_file: continue alfname = dict(object='sync', namespace='spikeglx') if efi.label: alfname['extra'] = efi.label file_exists = alfio.exists(bin_file.parent, **alfname) if not overwrite and file_exists: _logger.warning( f'Skipping raw sync: SGLX sync found for probe {efi.label}!') sync = alfio.load_object(bin_file.parent, **alfname) out_files, _ = alfio._ls(bin_file.parent, **alfname) else: sr = spikeglx.Reader(bin_file) sync, out_files = _sync_to_alf(sr, bin_file.parent, save=True, parts=efi.label) outputs.extend(out_files) syncs.extend([sync]) return syncs, outputs
def _download_raw_partial(self, first_chunk=0, last_chunk=0): """ downloads one or several chunks of a mtscomp file and copy ch files and metadata to return a spikeglx.Reader object :param first_chunk: :param last_chunk: :return: spikeglx.Reader of the current chunk, Pathlib.Path of the directory where it is stored """ assert str(self.url_cbin).endswith('.cbin') webclient = self.one.alyx relpath = Path( self.url_cbin.replace(webclient._par.HTTP_DATA_SERVER, '.')).parents[0] # write the temp file into a subdirectory tdir_chunk = f"chunk_{str(first_chunk).zfill(6)}_to_{str(last_chunk).zfill(6)}" target_dir = Path(self.cache_folder, relpath, tdir_chunk) self.target_dir = target_dir Path(target_dir).mkdir(parents=True, exist_ok=True) ch_file_stream = target_dir.joinpath( self.file_chunks.name).with_suffix('.stream.ch') # Get the first sample index, and the number of samples to download. i0 = self.chunks['chunk_bounds'][first_chunk] ns_stream = self.chunks['chunk_bounds'][last_chunk + 1] - i0 total_samples = self.chunks['chunk_bounds'][-1] # handles the meta file meta_local_path = ch_file_stream.with_suffix('.meta') if not meta_local_path.exists(): shutil.copy(self.file_chunks.with_suffix('.meta'), meta_local_path) # if the cached version happens to be the same as the one on disk, just load it if ch_file_stream.exists(): with open(ch_file_stream, 'r') as f: cmeta_stream = json.load(f) if (cmeta_stream.get('chopped_first_sample', None) == i0 and cmeta_stream.get('chopped_total_samples', None) == total_samples): return spikeglx.Reader(ch_file_stream.with_suffix('.cbin'), ignore_warnings=True) else: shutil.copy(self.file_chunks, ch_file_stream) assert ch_file_stream.exists() cmeta = self.chunks.copy() # prepare the metadata file cmeta['chunk_bounds'] = cmeta['chunk_bounds'][first_chunk:last_chunk + 2] cmeta['chunk_bounds'] = [int(_ - i0) for _ in cmeta['chunk_bounds']] assert len(cmeta['chunk_bounds']) >= 2 assert cmeta['chunk_bounds'][0] == 0 first_byte = cmeta['chunk_offsets'][first_chunk] cmeta['chunk_offsets'] = cmeta['chunk_offsets'][ first_chunk:last_chunk + 2] cmeta['chunk_offsets'] = [ _ - first_byte for _ in cmeta['chunk_offsets'] ] assert len(cmeta['chunk_offsets']) >= 2 assert cmeta['chunk_offsets'][0] == 0 n_bytes = cmeta['chunk_offsets'][-1] assert n_bytes > 0 # Save the chopped chunk bounds and offsets. cmeta['sha1_compressed'] = None cmeta['sha1_uncompressed'] = None cmeta['chopped'] = True cmeta['chopped_first_sample'] = int(i0) cmeta['chopped_samples'] = int(ns_stream) cmeta['chopped_total_samples'] = int(total_samples) with open(ch_file_stream, 'w') as f: json.dump(cmeta, f, indent=2, sort_keys=True) # Download the requested chunks cbin_local_path = webclient.download_file(self.url_cbin, chunks=(first_byte, n_bytes), target_dir=target_dir, clobber=True, return_md5=False) cbin_local_path = remove_uuid_file(cbin_local_path) cbin_local_path_renamed = cbin_local_path.with_suffix('.stream.cbin') cbin_local_path.replace(cbin_local_path_renamed) assert cbin_local_path_renamed.exists() reader = spikeglx.Reader(cbin_local_path_renamed, ignore_warnings=True) return reader
def extract_waveforms(ephys_file, ts, ch, t=2.0, sr=30000, n_ch_probe=385, car=True): """ Extracts spike waveforms from binary ephys data file, after (optionally) common-average-referencing (CAR) spatial noise. Parameters ---------- ephys_file : string The file path to the binary ephys data. ts : ndarray_like The timestamps (in s) of the spikes. ch : ndarray_like The channels on which to extract the waveforms. t : numeric (optional) The time (in ms) of each returned waveform. sr : int (optional) The sampling rate (in hz) that the ephys data was acquired at. n_ch_probe : int (optional) The number of channels of the recording. car: bool (optional) A flag to perform CAR before extracting waveforms. Returns ------- waveforms : ndarray An array of shape (#spikes, #samples, #channels) containing the waveforms. Examples -------- 1) Extract all the waveforms for unit1 with and without CAR. >>> import numpy as np >>> import brainbox as bb >>> import alf.io as aio >>> import ibllib.ephys.spikes as e_spks (*Note, if there is no 'alf' directory, make 'alf' directory from 'ks2' output directory): >>> e_spks.ks2_to_alf(path_to_ks_out, path_to_alf_out) # Get a clusters bunch and a units bunch from a spikes bunch from an alf directory. >>> clstrs_b = aio.load_object(path_to_alf_out, 'clusters') >>> spks_b = aio.load_object(path_to_alf_out, 'spikes') >>> units_b = bb.processing.get_units_bunch(spks, ['times']) # Get the timestamps and 20 channels around the max amp channel for unit1, and extract the # two sets of waveforms. >>> ts = units_b['times']['1'] >>> max_ch = max_ch = clstrs_b['channels'][1] >>> if max_ch < 10: # take only channels greater than `max_ch`. >>> ch = np.arange(max_ch, max_ch + 20) >>> elif (max_ch + 10) > 385: # take only channels less than `max_ch`. >>> ch = np.arange(max_ch - 20, max_ch) >>> else: # take `n_c_ch` around `max_ch`. >>> ch = np.arange(max_ch - 10, max_ch + 10) >>> wf = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=False) >>> wf_car = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=True) """ # Get memmapped array of `ephys_file` with spikeglx.Reader(ephys_file) as s_reader: file_m = s_reader.data # the memmapped array n_wf_samples = int( sr / 1000 * (t / 2)) # number of samples to return on each side of a ts ts_samples = np.array(ts * sr).astype( int) # the samples corresponding to `ts` t_sample_first = ts_samples[0] - n_wf_samples # Exception handling for impossible channels ch = np.asarray(ch) ch = ch.reshape((ch.size, 1)) if ch.size == 1 else ch if np.any(ch < 0) or np.any(ch > n_ch_probe): raise Exception( 'At least one specified channel number is impossible. ' f'The minimum channel number was {np.min(ch)}, ' f'and the maximum channel number was {np.max(ch)}. ' 'Check specified channel numbers and try again.') if car: # compute spatial noise in chunks # see https://github.com/int-brain-lab/iblenv/issues/5 raise NotImplementedError("CAR option is not available") # Initialize `waveforms`, extract waveforms from `file_m`, and CAR. waveforms = np.zeros((len(ts), 2 * n_wf_samples, ch.size)) # Give time estimate for extracting waveforms. t0 = time.perf_counter() for i in range(5): waveforms[i, :, :] = \ file_m[i * n_wf_samples * 2 + t_sample_first: i * n_wf_samples * 2 + t_sample_first + n_wf_samples * 2, ch].reshape( (n_wf_samples * 2, ch.size)) dt = time.perf_counter() - t0 print( 'Performing waveform extraction. Estimated time is {:.2f} mins. ({})' .format(dt * len(ts) / 60 / 5, time.ctime())) for spk, _ in enumerate(ts): # extract waveforms spk_ts_sample = ts_samples[spk] spk_samples = np.arange(spk_ts_sample - n_wf_samples, spk_ts_sample + n_wf_samples) # have to reshape to add an axis to broadcast `file_m` into `waveforms` waveforms[spk, :, :] = \ file_m[spk_samples[0]:spk_samples[-1] + 1, ch].reshape((spk_samples.size, ch.size)) print('Done. ({})'.format(time.ctime())) return waveforms
import neurodsp.utils import spikeglx import ibllib.io.extractors.ephys_fpga BATCH_SIZE_SAMPLES = 50000 # full path to the raw ephys raw_ephys_apfile = ( '/datadisk/Data/Subjects/ZM_1150/2019-05-07/001/raw_ephys_data/probe_right/' 'ephysData_g0_t0.imec.ap.bin') output_path = '/home/olivier/scratch' # load reader object, and extract sync traces sr = spikeglx.Reader(raw_ephys_apfile) sync = ibllib.io.extractors.ephys_fpga._sync_to_alf(sr, output_path, save=False) # if the data is needed as well, loop over the file # raw data contains raw ephys traces, while raw_sync contains the 16 sync traces wg = neurodsp.utils.WindowGenerator(sr.ns, BATCH_SIZE_SAMPLES, overlap=1) for first, last in wg.firstlast: rawdata, rawsync = sr.read_samples(first, last) wg.print_progress()