def peak_to_peak_amp(ephys_file, samp_inds, nsamps): #read raw ephys file sr = spikeglx.Reader(ephys_file) #take a subset (nsamps) of the spike samples samples = np.random.choice(samp_inds,nsamps) #initialize arrays amps=np.zeros(len(samples)) wfs=np.zeros((384,len(samples))) wfs_baseline=np.zeros((384,len(samples))) cnt=0 for i in samples: wf = sr.data[int(i)] wf_baseline = wf[:-1]-np.median(wf[:-1]) #subtract median baseline # plt.plot(wf_baseline) wfs[:,cnt] = wf[:-1] wfs_baseline[:,cnt] = wf_baseline amps[cnt] = np.max(wf_baseline)-np.min(wf_baseline) cnt+=1 amps = np.max(wfs_baseline,axis=0)-np.min(wfs_baseline,axis=0) mean_amp = np.mean(amps) return mean_amp
def extract_sync(session_path, overwrite=False, ephys_files=None): """ Reads ephys binary file (s) and extract sync within the binary file folder Assumes ephys data is within a `raw_ephys_data` folder :param session_path: '/path/to/subject/yyyy-mm-dd/001' :param overwrite: Bool on re-extraction, forces overwrite instead of loading existing files :return: list of sync dictionaries """ session_path = Path(session_path) if not ephys_files: ephys_files = spikeglx.glob_ephys_files(session_path) syncs = [] outputs = [] for efi in ephys_files: bin_file = efi.get('ap', efi.get('nidq', None)) if not bin_file: continue alfname = dict(object='sync', namespace='spikeglx') if efi.label: alfname['extra'] = efi.label file_exists = alfio.exists(bin_file.parent, **alfname) if not overwrite and file_exists: _logger.warning(f'Skipping raw sync: SGLX sync found for probe {efi.label} !') sync = alfio.load_object(bin_file.parent, **alfname) out_files, _ = alfio._ls(bin_file.parent, **alfname) else: sr = spikeglx.Reader(bin_file) sync, out_files = _sync_to_alf(sr, bin_file.parent, save=True, parts=efi.label) outputs.extend(out_files) syncs.extend([sync]) return syncs, outputs
def extract_rmsmap(fbin, out_folder=None, force=False): """ Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files :param fbin: binary file in spike glx format (will look for attached metatdata) :param out_folder: folder in which to store output ALF files. Default uses the folder in which the `fbin` file lives. :param force: do not re-extract if all ALF files already exist :param label: string or list of strings that will be appended to the filename before extension :return: None """ _logger.info(str(fbin)) sglx = spikeglx.Reader(fbin) # check if output ALF files exist already: if out_folder is None: out_folder = Path(fbin).parent else: out_folder = Path(out_folder) alf_object_time = f'_iblqc_ephysTimeRms{sglx.type.upper()}' alf_object_freq = f'_iblqc_ephysSpectralDensity{sglx.type.upper()}' if alf.io.exists(out_folder, alf_object_time) and \ alf.io.exists(out_folder, alf_object_freq) and not force: _logger.warning(f'{fbin.name} QC already exists, skipping. Use force option to override') return # crunch numbers rms = rmsmap(fbin) # output ALF files, single precision with the optional label as suffix before extension if not out_folder.exists(): out_folder.mkdir() tdict = {'rms': rms['TRMS'].astype(np.single), 'timestamps': rms['tscale'].astype(np.single)} fdict = {'power': rms['spectral_density'].astype(np.single), 'freqs': rms['fscale'].astype(np.single)} out_time = alf.io.save_object_npy(out_folder, object=alf_object_time, dico=tdict) out_freq = alf.io.save_object_npy(out_folder, object=alf_object_freq, dico=fdict) return out_time + out_freq
def _run(self): """ Compress ephys files looking for `compress_ephys.flag` whithin the probes folder Original bin file will be removed The registration flag created contains targeted file names at the root of the session """ out_files = [] ephys_files = spikeglx.glob_ephys_files(self.session_path) ephys_files += spikeglx.glob_ephys_files(self.session_path, ext="ch") ephys_files += spikeglx.glob_ephys_files(self.session_path, ext="meta") for ef in ephys_files: for typ in ["ap", "lf", "nidq"]: bin_file = ef.get(typ) if not bin_file: continue if bin_file.suffix.find("bin") == 1: sr = spikeglx.Reader(bin_file) if sr.is_mtscomp: out_files.append(bin_file) else: _logger.info(f"Compressing binary file {bin_file}") out_files.append(sr.compress_file(keep_original=False)) out_files.append(bin_file.with_suffix('.ch')) else: out_files.append(bin_file) return out_files
def extract_rmsmap(fbin, out_folder=None, force=False): """ Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files :param fbin: binary file in spike glx format (will look for attached metatdata) :param folder_alf: folder in which to store output ALF files. Creates/Uses an ALF folder at the same level as the `fbin` file provided by default :param force: do not re-extract if all ALF files already exist :return: None """ logger_.info(str(fbin)) sglx = spikeglx.Reader(fbin) # check if output ALF files exist already: if out_folder is None: out_folder = Path(fbin).parent / ('qc_ephys_' + Path(fbin).name.split('.')[0]) else: out_folder = Path(out_folder) files = {'rms': out_folder / ('_ibl_ephysRmsMap_' + sglx.type + '.rms..npy'), 'times': out_folder / ('_ibl_ephysRmsMap_' + sglx.type + '.times.npy'), 'power': out_folder / ('_ibl_ephysSpectra_' + sglx.type + '.power.npy'), 'frequencies': out_folder / ('_ibl_ephysSpectra_' + sglx.type + '.frequencies.npy')} # if they do and the option Force is set to false, do not recompute and exit if all([files[f].exists() for f in files]) and not force: logger_.warning('Output exists. Skipping ' + str(fbin) + ' Use force option to override') return # crunch numbers rms = rmsmap(fbin) # output ALF files, single precision if not out_folder.exists(): out_folder.mkdir() np.save(file=files['rms'], arr=rms['TRMS'].astype(np.single)) np.save(file=files['times'], arr=rms['tscale'].astype(np.single)) np.save(file=files['power'], arr=rms['spectral_density'].astype(np.single)) np.save(file=files['frequencies'], arr=rms['fscale'].astype(np.single))
def compress_ephys(root_data_folder, dry=False, max_sessions=5): """ Compress ephys files looking for `compress_ephys.flag` whithin the probes folder Original bin file will be removed The registration flag created contains targeted file names at the root of the session """ qcflags = Path(root_data_folder).rglob('compress_ephys.flag') c = 0 for qcflag in qcflags: probe_path = qcflag.parent c += 1 if c > max_sessions: return if dry: print(qcflag.parent) continue # no rglob: only the folder in which the flag is located gets searched ephys_files = spikeglx.glob_ephys_files(probe_path, recursive=False) out_files = [] for ef in ephys_files: for typ in ['ap', 'lf', 'nidq']: bin_file = ef.get(typ) if not bin_file: continue sr = spikeglx.Reader(bin_file) if not sr.is_mtscomp: out_files.append(sr.compress_file(keep_original=False)) qcflag.unlink() if out_files: session_path = probe_path.parents[1] file_list = [str(f.relative_to(session_path)) for f in out_files] flags.write_flag_file(probe_path.joinpath('register_me.flag'), file_list=file_list)
def align_data(eid, one, trial_idx, probe): # change .ap to .lf to get LFP instead of high frequency band # D['c607c5da-534e-4f30-97b3-b1d3e904e9fd']['probe01'] has Visp1, VISp2/3 and VISp4' # '3663d82b-f197-4e8b-b299-7b803a155b84', 'left', [8,8], lick example lf_paths = one.load(eid, dataset_types=[ 'ephysData.raw.meta', 'ephysData.raw.ch', 'ephysData.raw.sync', 'trials.intervals', 'trials.stimOn_times', 'trials.feedbackType', 'trials.goCue_times', 'trials.feedback_times', 'trials.contrastLeft', 'trials.contrastRight' ], download_only=True) #'ephysData.raw.ap', lf_file = [x for x in lf_paths if probe in str(x) and 'ap.cbin' in str(x)][0] sr = spikeglx.Reader(lf_file) sync_file = sr.file_bin.parent.joinpath( sr.file_bin.stem.replace('.ap', '.sync.npy')) sync = np.load(sync_file) fs_sync = int(np.mean(np.diff(sync[:, 0]))) # sampled at 20 Hz? # upsample sync signal to sr sample2time = scipy.interpolate.interp1d(sync[:, 0] * sr.fs, sync[:, 1]) alf_path = [x for x in lf_paths if 'alf' in str(x)][0].parent trials = alf.io.load_object(alf_path, '_ibl_trials') # digitize to search idx only in small chunk times_to_align_to = trials['intervals'][:, 0] binids = np.digitize(times_to_align_to, sync[:, 1]) # get lfp aligned for specific trial (trial_idx) t = trials['intervals'][:, 0][trial_idx] times = sample2time( np.arange((binids[trial_idx] - 1) * fs_sync * sr.fs, binids[trial_idx] * fs_sync * sr.fs)) lfp_index = find_nearest(times, t) startx = int( lfp_index + (binids[trial_idx] - 1) * fs_sync * sr.fs) # in observations, 2500 Hz print(startx) t_end = trials['intervals'][:, 1][trial_idx] data_bounds = [int(startx), int((t_end - t) * sr.fs) + int(startx)] # in lfp frame idx print(data_bounds) data = sr[data_bounds[0]:data_bounds[1], :-1] times_data = sample2time(np.arange(data_bounds[0], data_bounds[1])) data = data - np.mean(data) return data, times_data
def _run(self): """runs for initiated PID, streams data, destripe and check bad channels""" assert self.pid self.eqcs = [] T0 = 60 * 30 SNAPSHOT_LABEL = "raw_ephys_bad_channels" output_files = list(self.output_directory.glob(f'{SNAPSHOT_LABEL}*')) if len(output_files) == 4: return output_files self.output_directory.mkdir(exist_ok=True, parents=True) if self.location != 'server': self.histology_status = self.get_histology_status() electrodes = self.get_channels('electrodeSites', f'alf/{self.pname}') if 'atlas_id' in electrodes.keys(): electrodes['ibr'] = ismember(electrodes['atlas_id'], self.brain_regions.id)[1] electrodes['acronym'] = self.brain_regions.acronym[ electrodes['ibr']] electrodes['name'] = self.brain_regions.name[electrodes['ibr']] electrodes['title'] = self.histology_status else: electrodes = None sr, t0 = stream(self.pid, T0, nsecs=1, one=self.one) raw = sr[:, :-sr.nsync].T else: electrodes = None ap_file = next( self.session_path.joinpath('raw_ephys_data', self.pname).glob('*ap.*bin'), None) if ap_file is not None: sr = spikeglx.Reader(ap_file) raw = sr[int((sr.fs * T0)):int((sr.fs * (T0 + 1))), :-sr.nsync].T else: return [] channel_labels, channel_features = voltage.detect_bad_channels( raw, sr.fs) _, eqcs, output_files = ephys_bad_channels( raw=raw, fs=sr.fs, channel_labels=channel_labels, channel_features=channel_features, channels=electrodes, title=SNAPSHOT_LABEL, destripe=True, save_dir=self.output_directory, br=self.brain_regions, pid_info=self.pid_label) self.eqcs = eqcs return output_files
def setUp(self): self._tempdir = tempfile.TemporaryDirectory() # self.addClassCleanup(self._tempdir.cleanup) # py3.8 self.workdir = Path(self._tempdir.name) file_meta = Path(__file__).parent.joinpath('fixtures', 'io', 'spikeglx', 'sample3A_short_g0_t0.imec.ap.meta') self.file_bin = spikeglx._mock_spikeglx_file( self.workdir.joinpath('sample3A_short_g0_t0.imec.ap.bin'), file_meta, ns=76104, nc=385, sync_depth=16, random=True)['bin_file'] self.sr = spikeglx.Reader(self.file_bin)
def test_compress_session(self): EphysMtscomp(self.main_folder).run() ephys_files = spikeglx.glob_ephys_files(self.main_folder) for ef in ephys_files: # there is only one compressed file afterwards self.assertTrue(ef.ap.suffix == '.cbin') self.assertFalse(ef.ap.with_suffix('.bin').exists()) # the compressed file is readable sr = spikeglx.Reader(ef.ap) self.assertTrue(sr.is_mtscomp)
def rmsmap(fbin, spectra=True): """ Computes RMS map in time domain and spectra for each channel of Neuropixel probe :param fbin: binary file in spike glx format (will look for attached metatdata) :type fbin: str or pathlib.Path :param spectra: whether to compute the power spectrum (only need for lfp data) :type: bool :return: a dictionary with amplitudes in channeltime space, channelfrequency space, time and frequency scales """ if not isinstance(fbin, spikeglx.Reader): sglx = spikeglx.Reader(fbin) rms_win_length_samples = 2**np.ceil(np.log2(sglx.fs * RMS_WIN_LENGTH_SECS)) # the window generator will generates window indices wingen = dsp.WindowGenerator(ns=sglx.ns, nswin=rms_win_length_samples, overlap=0) # pre-allocate output dictionary of numpy arrays win = { 'TRMS': np.zeros((wingen.nwin, sglx.nc)), 'nsamples': np.zeros((wingen.nwin, )), 'fscale': dsp.fscale(WELCH_WIN_LENGTH_SAMPLES, 1 / sglx.fs, one_sided=True), 'tscale': wingen.tscale(fs=sglx.fs) } win['spectral_density'] = np.zeros((len(win['fscale']), sglx.nc)) # loop through the whole session for first, last in wingen.firstlast: D = sglx.read_samples(first_sample=first, last_sample=last)[0].transpose() # remove low frequency noise below 1 Hz D = dsp.hp(D, 1 / sglx.fs, [0, 1]) iw = wingen.iw win['TRMS'][iw, :] = dsp.rms(D) win['nsamples'][iw] = D.shape[1] if spectra: # the last window may be smaller than what is needed for welch if last - first < WELCH_WIN_LENGTH_SAMPLES: continue # compute a smoothed spectrum using welch method _, w = signal.welch(D, fs=sglx.fs, window='hanning', nperseg=WELCH_WIN_LENGTH_SAMPLES, detrend='constant', return_onesided=True, scaling='density', axis=-1) win['spectral_density'] += w.T # print at least every 20 windows if (iw % min(20, max(int(np.floor(wingen.nwin / 75)), 1))) == 0: print_progress(iw, wingen.nwin) return win
def _sync_to_alf(raw_ephys_apfile, output_path=None, save=False, parts=''): """ Extracts sync.times, sync.channels and sync.polarities from binary ephys dataset :param raw_ephys_apfile: bin file containing ephys data or spike :param output_path: output directory :param save: bool write to disk only if True :param parts: string or list of strings that will be appended to the filename before extension :return: """ # handles input argument: support ibllib.io.spikeglx.Reader, str and pathlib.Path if isinstance(raw_ephys_apfile, spikeglx.Reader): sr = raw_ephys_apfile else: raw_ephys_apfile = Path(raw_ephys_apfile) sr = spikeglx.Reader(raw_ephys_apfile) # if no output, need a temp folder to swap for big files if not output_path: output_path = raw_ephys_apfile.parent file_ftcp = Path(output_path).joinpath( f'fronts_times_channel_polarity{str(uuid.uuid4())}.bin') # loop over chunks of the raw ephys file wg = dsp.WindowGenerator(sr.ns, int(SYNC_BATCH_SIZE_SECS * sr.fs), overlap=1) fid_ftcp = open(file_ftcp, 'wb') for sl in wg.slice: ss = sr.read_sync(sl) ind, fronts = dsp.fronts(ss, axis=0) # a = sr.read_sync_analog(sl) sav = np.c_[(ind[0, :] + sl.start) / sr.fs, ind[1, :], fronts.astype(np.double)] sav.tofile(fid_ftcp) # print progress wg.print_progress() # close temp file, read from it and delete fid_ftcp.close() tim_chan_pol = np.fromfile(str(file_ftcp)) tim_chan_pol = tim_chan_pol.reshape((int(tim_chan_pol.size / 3), 3)) file_ftcp.unlink() sync = { 'times': tim_chan_pol[:, 0], 'channels': tim_chan_pol[:, 1], 'polarities': tim_chan_pol[:, 2] } if save: out_files = alf.io.save_object_npy(output_path, sync, '_spikeglx_sync', parts=parts) return Bunch(sync), out_files else: return Bunch(sync)
def stream(pid, t0, one=None, cache=True, dsets=None): """ NB: returned Reader object must be closed after use :param pid: Probe UUID :param t0: :param one: An instance of ONE :param cache: :param dsets: :return: """ tlen = 1 assert one if cache: samples_folder = Path(one._par.CACHE_DIR).joinpath('cache', 'ap') sample_file_name = Path(f"{pid}_{str(int(t0)).zfill(5)}.meta") if dsets is None: dsets = one.alyx.rest('datasets', 'list', probe_insertion=pid) if cache and samples_folder.joinpath(sample_file_name).exists(): print(f'loading {sample_file_name} from cache') sr = spikeglx.Reader( samples_folder.joinpath(sample_file_name).with_suffix('.bin'), open=True) return sr, dsets dset_ch = next(dset for dset in dsets if dset['dataset_type'] == "ephysData.raw.ch" and '.ap.' in dset['name']) dset_meta = next(dset for dset in dsets if dset['dataset_type'] == "ephysData.raw.meta" and '.ap.' in dset['name']) dset_cbin = next(dset for dset in dsets if dset['dataset_type'] == "ephysData.raw.ap" and '.ap.' in dset['name']) file_ch, file_meta = one.download_datasets([dset_ch, dset_meta]) first_chunk = int(t0 / CHUNK_DURATION_SECS) last_chunk = int((t0 + tlen) / CHUNK_DURATION_SECS) - 1 sr = one.download_raw_partial(url_cbin=dataset_record_to_url(dset_cbin)[0], url_ch=file_ch, first_chunk=first_chunk, last_chunk=last_chunk) if cache: samples_folder.mkdir(exist_ok=True, parents=True) out_meta = samples_folder.joinpath(sample_file_name) shutil.copy(sr.file_meta_data, out_meta) with open(out_meta.with_suffix('.bin'), 'wb') as fp: sr.open() sr._raw[:].tofile(fp) return sr, dsets
def assert_read_glx(self, tglx): sr = spikeglx.Reader(tglx['bin_file']) dexpected = sr.channel_conversion_sample2v[sr.type] * tglx['D'] d, sync = sr.read_samples(0, tglx['ns']) # could be rounding errors with non-integer sampling rates self.assertTrue(sr.nc == tglx['nc']) self.assertTrue(sr.ns == tglx['ns']) # test the data reading with gain self.assertTrue(np.all(np.isclose(dexpected, d))) # test the sync reading, one front per channel self.assertTrue(np.sum(sync) == tglx['sync_depth']) for m in np.arange(tglx['sync_depth']): self.assertTrue(sync[m + 1, m] == 1) if sr.type in ['ap', 'lf']: # exclude nidq from the slicing circus # teast reading only one channel d, _ = sr.read(slice(None), 10) self.assertTrue(np.all(np.isclose(d, dexpected[:, 10]))) # test reading only one time d, _ = sr.read(5, slice(None)) self.assertTrue(np.all(np.isclose(d, dexpected[5, :]))) # test reading a few times d, _ = sr.read(slice(5, 7), slice(None)) self.assertTrue(np.all(np.isclose(d, dexpected[5:7, :]))) d, _ = sr.read([5, 6], slice(None)) self.assertTrue(np.all(np.isclose(d, dexpected[5:7, :]))) # test reading a few channels d, _ = sr.read(slice(None), slice(300, 310)) self.assertTrue(np.all(np.isclose(d, dexpected[:, 300:310]))) # test reading a few channels with a numpy array of indices ind = np.array([300, 302]) d, _ = sr.read(slice(None), ind) self.assertTrue(np.all(np.isclose(d, dexpected[:, ind]))) # test double slicing d, _ = sr.read(slice(5, 10), slice(300, 310)) self.assertTrue(np.all(np.isclose(d, dexpected[5:10, 300:310]))) # test empty slices d, _ = sr.read(slice(5, 10), []) self.assertTrue(d.size == 0) d, _ = sr.read([], []) self.assertTrue(d.size == 0) d, _ = sr.read([], slice(300, 310)) self.assertTrue(d.size == 0) a = sr.read_sync_analog() self.assertIsNone(a) # test the read_samples method (should be deprecated ?) d, _ = sr.read_samples(0, 500, ind) self.assertTrue(np.all(np.isclose(d, dexpected[0:500, ind]))) d, _ = sr.read_samples(0, 500) self.assertTrue(np.all(np.isclose(d, dexpected[0:500, :]))) else: s = sr.read_sync() self.assertTrue(s.shape[1] == 17) self.tdir.cleanup()
def test_check_ephys_file(self): self.tdir = tempfile.TemporaryDirectory(prefix='glx_test') self.addCleanup(self.tdir.cleanup) bin_3b = spikeglx._mock_spikeglx_file( Path(self.tdir.name).joinpath('sample3B_g0_t0.imec1.ap.bin'), self.workdir / 'sample3B_g0_t0.imec1.ap.meta', ns=32, nc=385, sync_depth=16) self.assertEqual(hashfile.md5(bin_3b['bin_file']), "207ba1666b866a091e5bb8b26d19733f") self.assertEqual(hashfile.sha1(bin_3b['bin_file']), '1bf3219c35dea15409576f6764dd9152c3f8a89c') sr = spikeglx.Reader(bin_3b['bin_file']) self.assertTrue(sr.verify_hash())
def test_compress(self): def compare_data(sr0, sr1): # test direct reading through memmap / mtscompreader self.assertTrue( np.all(sr0.data[1200:1210, 12] == sr1.data[1200:1210, 12])) # test reading through methods d0, s0 = sr0.read_samples(1200, 54245) d1, s1 = sr1.read_samples(1200, 54245) self.assertTrue(np.all(d0 == d1)) self.assertTrue(np.all(s0 == s1)) # create a reference file that will serve to compare for inplace operations ref_file = self.file_bin.parent.joinpath('REF_' + self.file_bin.name) ref_meta = self.file_bin.parent.joinpath( 'REF_' + self.file_bin.with_suffix('.meta').name) shutil.copy(self.file_bin, ref_file) shutil.copy(self.file_bin.with_suffix('.meta'), ref_meta) sr_ref = spikeglx.Reader(ref_file) # test file compression copy self.assertFalse(self.sr.is_mtscomp) self.file_cbin = self.sr.compress_file() self.sc = spikeglx.Reader(self.file_cbin) self.assertTrue(self.sc.is_mtscomp) compare_data(sr_ref, self.sc) # test decompression in-place self.sc.decompress_file(keep_original=False, overwrite=True) compare_data(sr_ref, self.sc) self.assertFalse(self.sr.is_mtscomp) self.assertFalse(self.file_cbin.exists()) compare_data(sr_ref, self.sc) # test compression in-place self.sc.compress_file(keep_original=False, overwrite=True) compare_data(sr_ref, self.sc) self.assertTrue(self.sc.is_mtscomp) self.assertTrue(self.file_cbin.exists()) self.assertFalse(self.file_bin.exists()) compare_data(sr_ref, self.sc)
def assert_read_glx(self, tglx): sr = spikeglx.Reader(tglx['bin_file']) d, sync = sr.read_samples(0, tglx['ns']) # could be rounding errors with non-integer sampling rates self.assertTrue(sr.nc == tglx['nc']) self.assertTrue(sr.ns == tglx['ns']) # test the data reading with gain self.assertTrue(np.all(sr.channel_conversion_sample2mv[sr.type] * tglx['D'] == d)) # test the sync reading, one front per channel self.assertTrue(np.sum(sync) == tglx['sync_depth']) for m in np.arange(tglx['sync_depth']): self.assertTrue(sync[m + 1, m] == 1) self.tdir.cleanup()
def check_ephys_file(root_path, hash=False): root_path = Path(root_path) efiles = spikeglx.glob_ephys_files(root_path) for ef in efiles: for lab in ['nidq', 'ap', 'lf']: if not ef.get(lab, None): continue try: sr = spikeglx.Reader(ef[lab]) if hash: ok = sr.verify_hash() if not ok: raise ValueError("hashes don't match") _logger.info(f"PASS {ef[lab]}") except (Exception) as e: _logger.error(f"FAILED {ef[lab]} is corrupt !!")
def load_lfp(eid, one=None, dataset_types=None): """ From an eid, hits the Alyx database and downloads the standard set of datasets needed for LFP :param eid: :param dataset_types: additional dataset types to add to the list :return: spikeglx.Reader """ if dataset_types is None: dataset_types = [] dtypes = dataset_types + ['ephysData.raw.lf', 'ephysData.raw.meta', 'ephysData.raw.ch'] one.load(eid, dataset_types=dtypes, download_only=True) session_path = one.path_from_eid(eid) efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) if ef.get('lf', None)] return [spikeglx.Reader(ef['lf']) for ef in efiles]
def _run(self): """ Compress ephys files looking for `compress_ephys.flag` whithin the probes folder Original bin file will be removed The registration flag created contains targeted file names at the root of the session """ ephys_files = spikeglx.glob_ephys_files(self.session_path) out_files = [] for ef in ephys_files: for typ in ['ap', 'lf', 'nidq']: bin_file = ef.get(typ) if not bin_file: continue sr = spikeglx.Reader(bin_file) if sr.is_mtscomp: out_files.append(bin_file) else: _logger.info(f"Compressing binary file {bin_file}") out_files.append(sr.compress_file(keep_original=False)) return out_files
def extract_rmsmap(fbin, out_folder=None, spectra=True): """ Wrapper for rmsmap that outputs _ibl_ephysRmsMap and _ibl_ephysSpectra ALF files :param fbin: binary file in spike glx format (will look for attached metatdata) :param out_folder: folder in which to store output ALF files. Default uses the folder in which the `fbin` file lives. :param spectra: whether to compute the power spectrum (only need for lfp data) :type: bool :return: None """ _logger.info(f"Computing QC for {fbin}") sglx = spikeglx.Reader(fbin) # check if output ALF files exist already: if out_folder is None: out_folder = Path(fbin).parent else: out_folder = Path(out_folder) alf_object_time = f'_iblqc_ephysTimeRms{sglx.type.upper()}' alf_object_freq = f'_iblqc_ephysSpectralDensity{sglx.type.upper()}' # crunch numbers rms = rmsmap(fbin, spectra=spectra) # output ALF files, single precision with the optional label as suffix before extension if not out_folder.exists(): out_folder.mkdir() tdict = { 'rms': rms['TRMS'].astype(np.single), 'timestamps': rms['tscale'].astype(np.single) } alf.io.save_object_npy(out_folder, object=alf_object_time, dico=tdict) if spectra: fdict = { 'power': rms['spectral_density'].astype(np.single), 'freqs': rms['fscale'].astype(np.single) } alf.io.save_object_npy(out_folder, object=alf_object_freq, dico=fdict)
def ptp_over_noise(ephys_file, ts, ch, t=2.0, sr=30000, n_ch_probe=385, dtype='int16', offset=0, car=True): """ For specified channels, for specified timestamps, computes the mean (peak-to-peak amplitudes / the MADs of the background noise). Parameters ---------- ephys_file : string The file path to the binary ephys data. ts : ndarray_like The timestamps (in s) of the spikes. ch : ndarray_like The channels on which to extract the waveforms. t : numeric (optional) The time (in ms) of the waveforms to extract to compute the ptp. sr : int (optional) The sampling rate (in hz) that the ephys data was acquired at. n_ch_probe : int (optional) The number of channels of the recording. dtype: str (optional) The datatype represented by the bytes in `ephys_file`. offset: int (optional) The offset (in bytes) from the start of `ephys_file`. car: bool (optional) A flag to perform common-average-referencing before extracting waveforms. Returns ------- ptp_sigma : ndarray An array containing the mean ptp_over_noise values for the specified `ts` and `ch`. Examples -------- 1) Compute ptp_over_noise for all spikes on 20 channels around the channel of max amplitude for unit 1. >>> ts = units_b['times']['1'] >>> max_ch = max_ch = clstrs_b['channels'][1] >>> if max_ch < 10: # take only channels greater than `max_ch`. >>> ch = np.arange(max_ch, max_ch + 20) >>> elif (max_ch + 10) > 385: # take only channels less than `max_ch`. >>> ch = np.arange(max_ch - 20, max_ch) >>> else: # take `n_c_ch` around `max_ch`. >>> ch = np.arange(max_ch - 10, max_ch + 10) >>> p = bb.metrics.ptp_over_noise(ephys_file, ts, ch) """ # Ensure `ch` is ndarray ch = np.asarray(ch) ch = ch.reshape((ch.size, 1)) if ch.size == 1 else ch # Get waveforms. wf = bb.io.extract_waveforms(ephys_file, ts, ch, t=t, sr=sr, n_ch_probe=n_ch_probe, dtype=dtype, offset=offset, car=car) # Initialize `mean_ptp` based on `ch`, and compute mean ptp of all spikes for each ch. mean_ptp = np.zeros((ch.size, )) for cur_ch in range(ch.size, ): mean_ptp[cur_ch] = np.mean( np.max(wf[:, :, cur_ch], axis=1) - np.min(wf[:, :, cur_ch], axis=1)) # Compute MAD for `ch` in chunks. s_reader = spikeglx.Reader(ephys_file) file_m = s_reader.data # the memmapped array n_chunk_samples = 5e6 # number of samples per chunk n_chunks = np.ceil(file_m.shape[0] / n_chunk_samples).astype('int') # Get samples that make up each chunk. e.g. `chunk_sample[1] - chunk_sample[0]` are the # samples that make up the first chunk. chunk_sample = np.arange(0, file_m.shape[0], n_chunk_samples, dtype=int) chunk_sample = np.append(chunk_sample, file_m.shape[0]) # Give time estimate for computing MAD. t0 = time.perf_counter() stats.median_absolute_deviation(file_m[chunk_sample[0]:chunk_sample[1], ch], axis=0) dt = time.perf_counter() - t0 print('Performing MAD computation. Estimated time is {:.2f} mins.' ' ({})'.format(dt * n_chunks / 60, time.ctime())) # Compute MAD for each chunk, then take the median MAD of all chunks. mad_chunks = np.zeros((n_chunks, ch.size), dtype=np.int16) for chunk in range(n_chunks): mad_chunks[chunk, :] = stats.median_absolute_deviation( file_m[chunk_sample[chunk]:chunk_sample[chunk + 1], ch], axis=0, scale=1) print('Done. ({})'.format(time.ctime())) # Return `mean_ptp` over `mad` mad = np.median(mad_chunks, axis=0) ptp_sigma = mean_ptp / mad return ptp_sigma
RAW_PATH = Path("/datadisk/Data/spike_sorting/benchmark/raw") SORT_PATH = Path( "/datadisk/team_drives/WG-Neural-Analysis/Spike-Sorting-Analysis/benchmarks" ) SORTERS = ['ks2', 'ks3', 'pyks2.5'] "8413c5c6-b42b-4ec6-b751-881a54413628", "8ca1a850-26ef-42be-8b28-c2e2d12f06d6", "ce24bbe9-ae70-4659-9e9c-564d1a865de8", "ce397420-3cd2-4a55-8fd1-5e28321981f4", # Example 1 pid, t0 = ("ce24bbe9-ae70-4659-9e9c-564d1a865de8", 810) bin_file = next(RAW_PATH.joinpath(pid).rglob("*.ap.bin")) sr = spikeglx.Reader(bin_file) sel = slice(int(t0 * sr.fs), int((t0 + 4) * sr.fs)) raw = sr[sel, :-1].T # Example 2: Plot Insertion for a given PID av = run_needles2.view(lazy=True) av.add_insertion_by_id(pid) # Example 3: Show the PSD fig, ax = plt.subplots() fig.set_size_inches(14, 7) show_psd(raw, sr.fs, ax=ax) # Example 4: Display the raw / pre-proc h = neuropixel.trace_header() sos = scipy.signal.butter(3, 300 / sr.fs / 2, btype='highpass', output='sos')
file_ind = np.random.randint(len(files_samples)) file_ind = 739 # very good quality spike sorting print(file_ind, files_samples[file_ind]) pid, t0 = ('47da98a8-f282-4830-92c2-af0e1d4f00e2', 1425.) pid = files_samples[file_ind] # pid, t0 = ("01c6065e-eb3c-49ba-9c25-c1f17b18d529", 500) if isinstance(pid, Path): file_sample = pid pid, t0 = file_sample.stem.split('_') t0 = float(t0) sr = spikeglx.Reader(file_sample) dsets = one.alyx.rest('datasets', 'list', probe_insertion=pid) else: sr, dsets = stream(pid, t0, one=one, samples_folder=folder_samples) # plot_insertion(pid, one) h = neuropixel.trace_header() raw = sr[:, :-1].T sos = scipy.signal.butter(3, 300 / sr.fs / 2, btype='highpass', output='sos') butt = scipy.signal.sosfiltfilt(sos, raw) # show_psd(butt, sr.fs)
# 739 /datadisk/Data/spike_sorting/short_samples/f03b61b4-6b13-479d-940f-d1608eb275cc_04210.bin: Autre example de layering ou les charactéristiques spectrales / spatiales sont très différentes. Spikes alignés # 830 /datadisk/Data/spike_sorting/short_samples/b02c0ce6-2436-4fc0-9ea0-e7083a387d7e_03010.bin, très mauvaise qualité - spikes sont décalés ?!? file_ind = np.random.randint(len(files_samples)) file_ind = 739 # very good quality spike sorting print(file_ind, files_samples[file_ind]) pid, t0 = ('47da98a8-f282-4830-92c2-af0e1d4f00e2', 1425.) pid = files_samples[file_ind] # pid, t0 = ("01c6065e-eb3c-49ba-9c25-c1f17b18d529", 500) if isinstance(pid, Path): file_sample = pid pid, t0 = file_sample.stem.split('_') t0 = float(t0) sr = spikeglx.Reader(file_sample, open=True) dsets = one.alyx.rest('datasets', 'list', probe_insertion=pid) else: sr, dsets = stream(pid, t0, one=one, samples_folder=folder_samples) # plot_insertion(pid, one) h = neuropixel.trace_header() raw = sr[:, :-1].T sos = scipy.signal.butter(3, 300 / sr.fs / 2, btype='highpass', output='sos') butt = scipy.signal.sosfiltfilt(sos, raw) # show_psd(butt, sr.fs) fk_kwargs = {
def extract_waveforms(ephys_file, ts, ch, t=2.0, sr=30000, n_ch_probe=385, dtype='int16', offset=0, car=True): """ Extracts spike waveforms from binary ephys data file, after (optionally) common-average-referencing (CAR) spatial noise. Parameters ---------- ephys_file : string The file path to the binary ephys data. ts : ndarray_like The timestamps (in s) of the spikes. ch : ndarray_like The channels on which to extract the waveforms. t : numeric (optional) The time (in ms) of each returned waveform. sr : int (optional) The sampling rate (in hz) that the ephys data was acquired at. n_ch_probe : int (optional) The number of channels of the recording. dtype: str (optional) The datatype represented by the bytes in `ephys_file`. offset: int (optional) The offset (in bytes) from the start of `ephys_file`. car: bool (optional) A flag to perform CAR before extracting waveforms. Returns ------- waveforms : ndarray An array of shape (#spikes, #samples, #channels) containing the waveforms. Examples -------- 1) Extract all the waveforms for unit1 with and without CAR. >>> import numpy as np >>> import brainbox as bb >>> import alf.io as aio >>> import ibllib.ephys.spikes as e_spks (*Note, if there is no 'alf' directory, make 'alf' directory from 'ks2' output directory): >>> e_spks.ks2_to_alf(path_to_ks_out, path_to_alf_out) # Get a clusters bunch and a units bunch from a spikes bunch from an alf directory. >>> clstrs_b = aio.load_object(path_to_alf_out, 'clusters') >>> spks_b = aio.load_object(path_to_alf_out, 'spikes') >>> units_b = bb.processing.get_units_bunch(spks, ['times']) # Get the timestamps and 20 channels around the max amp channel for unit1, and extract the # two sets of waveforms. >>> ts = units_b['times']['1'] >>> max_ch = max_ch = clstrs_b['channels'][1] >>> if max_ch < 10: # take only channels greater than `max_ch`. >>> ch = np.arange(max_ch, max_ch + 20) >>> elif (max_ch + 10) > 385: # take only channels less than `max_ch`. >>> ch = np.arange(max_ch - 20, max_ch) >>> else: # take `n_c_ch` around `max_ch`. >>> ch = np.arange(max_ch - 10, max_ch + 10) >>> wf = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=False) >>> wf_car = bb.io.extract_waveforms(path_to_ephys_file, ts, ch, car=True) """ # (Previously memmaped the file manually, but now use `spikeglx.Reader`) # item_bytes = np.dtype(dtype).itemsize # n_samples = (op.getsize(ephys_file) - offset) // (item_bytes * n_ch_probe) # file_m = np.memmap(ephys_file, shape=(n_samples, n_ch_probe), dtype=dtype, mode='r') # Get memmapped array of `ephys_file` s_reader = spikeglx.Reader(ephys_file) file_m = s_reader.data # the memmapped array n_wf_samples = np.int( sr / 1000 * (t / 2)) # number of samples to return on each side of a ts ts_samples = np.array(ts * sr).astype( int) # the samples corresponding to `ts` t_sample_first = ts_samples[0] - n_wf_samples # Exception handling for impossible channels ch = np.asarray(ch) ch = ch.reshape((ch.size, 1)) if ch.size == 1 else ch if np.any(ch < 0) or np.any(ch > n_ch_probe): raise Exception( 'At least one specified channel number is impossible. The minimum channel' ' number was {}, and the maximum channel number was {}. Check specified' ' channel numbers and try again.'.format(np.min(ch), np.max(ch))) if car: # compute spatial noise in chunks # see https://github.com/int-brain-lab/iblenv/issues/5 raise NotImplementedError("CAR option is not available") # Initialize `waveforms`, extract waveforms from `file_m`, and CAR. waveforms = np.zeros((len(ts), 2 * n_wf_samples, ch.size)) # Give time estimate for extracting waveforms. t0 = time.perf_counter() for i in range(5): waveforms[i, :, :] = \ file_m[i * n_wf_samples * 2 + t_sample_first: i * n_wf_samples * 2 + t_sample_first + n_wf_samples * 2, ch].reshape( (n_wf_samples * 2, ch.size)) dt = time.perf_counter() - t0 print( 'Performing waveform extraction. Estimated time is {:.2f} mins. ({})'. format(dt * len(ts) / 60 / 5, time.ctime())) for spk, _ in enumerate(ts): # extract waveforms spk_ts_sample = ts_samples[spk] spk_samples = np.arange(spk_ts_sample - n_wf_samples, spk_ts_sample + n_wf_samples) # have to reshape to add an axis to broadcast `file_m` into `waveforms` waveforms[spk, :, :] = \ file_m[spk_samples[0]:spk_samples[-1] + 1, ch].reshape((spk_samples.size, ch.size)) print('Done. ({})'.format(time.ctime())) return waveforms
from matplotlib.gridspec import GridSpec from oneibl.one import ONE from ibllib.io import spikeglx # Download data one = ONE() eid = one.search(subject='ZM_2240', date_range=['2020-01-23', '2020-01-23']) lf_paths = one.load(eid[0], dataset_types=[ 'ephysData.raw.lf', 'ephysData.raw.meta', 'ephysData.raw.ch' ], download_only=True) # Read in raw LFP data from probe00 raw = spikeglx.Reader(lf_paths[0]) signal = raw.read(nsel=slice(None, 100000, None), csel=slice(None, None, None))[0] signal = signal * raw.channel_conversion_sample2v[ 'lf'] # Convert samples into uV signal = np.rot90(signal) ts = one.load(eid[0], 'ephysData.raw.timestamps') # %% Calculate power spectrum and coherence between two random channels ps_freqs, ps = bb.lfp.power_spectrum(signal, fs=raw.fs) random_ch = np.random.choice(raw.nc, 2) coh_freqs, coh, phase_lag = bb.lfp.coherence(signal[random_ch[0], :], signal[random_ch[1], :], fs=raw.fs)
from oneibl.one import ONE one = ONE() # Get a specific session eID eid = one.search(subject='ZM_2240', date_range='2020-01-22')[0] # Define and load dataset types of interest dtypes = ['ephysData.raw.lf', 'ephysData.raw.meta', 'ephysData.raw.ch'] one.load(eid, dataset_types=dtypes, download_only=True) # Get the files information session_path = one.path_from_eid(eid) efiles = [ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) if ef.get('lf', None)] # Read the files and get the data lfreader = spikeglx.Reader(efiles[0]['lf']) # -- Option 1 -- Get the data directly in Volts dat_volt = lfreader.read(nsel=slice(0, 1000, None)) # -- Option 2 -- Get the data in samples dat_samp = lfreader.data[:10000, :] # Get the conversion factor and check it matches s2mv = lfreader.channel_conversion_sample2v['lf'][0] # Convert sample to Volts if lfreader._raw[55, 5] * s2mv == lfreader[55, 5]: # TODO OLIVIER CHECK TEST ValueError
def amp_heatmap(ephys_file, ts, ch, sr=30000, n_ch_probe=385, dtype='int16', cmap_name='RdBu', car=True, ax=None): ''' Plots a heatmap of the normalized voltage values over time and space for given timestamps and channels, after (optionally) common-average-referencing. Parameters ---------- ephys_file : string The file path to the binary ephys data. ts: array_like A set of timestamps for which to get the voltage values. ch : array-like The channels to use for extracting the voltage values. sr : int (optional) The sampling rate (in hz) that the ephys data was acquired at. n_ch_probe : int (optional) The number of channels of the recording. dtype: str (optional) The datatype represented by the bytes in `ephys_file`. cmap_name : string (optional) The name of the colormap associated with the plot. car: bool (optional) A flag for whether or not to perform common-average-referencing before extracting waveforms ax : axessubplot (optional) The axis handle to plot the histogram on. (if `None`, a new figure and axis is created) Returns ------- v_vals : ndarray The voltage values. Examples -------- 1) Plot a heatmap of the spike amplitudes across 20 channels around the channel of max amplitude for all spikes in unit 1. >>> ts = units_b['times']['1'] >>> max_ch = clstrs_b['channels'][1] >>> if max_ch < n_c_ch: # take only channels greater than `max_ch`. >>> ch = np.arange(max_ch, max_ch + 20) >>> elif (max_ch + n_c_ch) > n_ch_probe: # take only channels less than `max_ch`. >>> ch = np.arange(max_ch - 20, max_ch) >>> else: # take `n_c_ch` around `max_ch`. >>> ch = np.arange(max_ch - 10, max_ch + 10) >>> bb.plot.amp_heatmap(path_to_ephys_file, ts, ch) ''' # Ensure `ch` is ndarray ch = np.asarray(ch) ch = ch.reshape((ch.size, 1)) if ch.size == 1 else ch # Get memmapped array of `ephys_file` s_reader = spikeglx.Reader(ephys_file) file_m = s_reader.data # Get voltage values for each peak amplitude sample for `ch`. max_amp_samples = (ts * sr).astype(int) # Currently this is an annoying way to calculate `v_vals` b/c indexing with multiple values # is currently unsupported. v_vals = np.zeros((max_amp_samples.size, ch.size)) for sample in range(max_amp_samples.size): v_vals[sample] = file_m[max_amp_samples[sample]:max_amp_samples[sample] + 1, ch] if car: # compute spatial noise in chunks, and subtract from `v_vals`. # Get subset of time (from first to last max amp sample) n_chunk_samples = 5e6 # number of samples per chunk n_chunks = np.ceil((max_amp_samples[-1] - max_amp_samples[0]) / n_chunk_samples).astype('int') # Get samples that make up each chunk. e.g. `chunk_sample[1] - chunk_sample[0]` are the # samples that make up the first chunk. chunk_sample = np.arange(max_amp_samples[0], max_amp_samples[-1], n_chunk_samples, dtype=int) chunk_sample = np.append(chunk_sample, max_amp_samples[-1]) noise_s_chunks = np.zeros((n_chunks, ch.size), dtype=np.int16) # spatial noise array # Give time estimate for computing `noise_s_chunks`. t0 = time.perf_counter() np.median(file_m[chunk_sample[0]:chunk_sample[1], ch], axis=0) dt = time.perf_counter() - t0 print('Performing spatial CAR before waveform extraction. Estimated time is {:.2f} mins.' ' ({})'.format(dt * n_chunks / 60, time.ctime())) # Compute noise for each chunk, then take the median noise of all chunks. for chunk in range(n_chunks): noise_s_chunks[chunk, :] = np.median( file_m[chunk_sample[chunk]:chunk_sample[chunk + 1], ch], axis=0) noise_s = np.median(noise_s_chunks, axis=0) v_vals -= noise_s[None, :] print('Done. ({})'.format(time.ctime())) # Plot heatmap. if ax is None: fig, ax = plt.subplots() v_vals_norm = (v_vals / np.max(abs(v_vals))).T cbar_map = ax.imshow(v_vals_norm, cmap=cmap_name, aspect='auto', extent=[ts[0], ts[-1], ch[0], ch[-1]], origin='lower') ax.set_yticks(np.arange(ch[0], ch[-1], 5)) ax.set_ylabel('Channel Numbers') ax.set_xlabel('Time (s)') ax.set_title('Voltage Heatmap') fig = ax.figure cbar = fig.colorbar(cbar_map, ax=ax) cbar.set_label('V', rotation=-90) return v_vals
one.load(eid, dataset_types=dtypes, download_only=True) # Get the files information session_path = one.path_from_eid(eid) efiles = [ ef for ef in spikeglx.glob_ephys_files(session_path, bin_exists=False) if ef.get('lf', None) ] efile = efiles[0]['lf'] # === Option 2 === You can also input a file locally, e.g. # efile = ('/datadisk/FlatIron/churchlandlab/Subjects/CSHL045/2020-02-26/001/' # 'raw_ephys_data/probe01/_spikeglx_ephysData_g0_t0.imec.lf.cbin') # === Read the files and get the data === sr = spikeglx.Reader(efile) sync_file = sr.file_bin.parent.joinpath( sr.file_bin.stem.replace('.lf', '.sync.npy')) sync = np.load(sync_file) sample2time = scipy.interpolate.interp1d(sync[:, 0] * sr.fs, sync[:, 1]) # Read and plot chunk of data data = sr[105000:109000, :-1] data = data - np.mean(data) tscale = sample2time(np.array([105000, 109000])) plt.figure() im = plt.imshow(data.transpose(), aspect='auto', extent=[*tscale, data.shape[1], 0])