def _mapping_from_regions_list(self, new_map, lateralize=False): """ From a vector of regions id, creates a mapping such as newids = self.mapping :param new_map: np.array: vector of regions id """ I_ROOT = 1 I_VOID = 0 # to lateralize we make sure all regions are represented in + and - new_map = np.unique(np.r_[-new_map, new_map]) assert np.all(np.isin(new_map, self.id)), \ "All mapping ids should be represented in the Allen ids" # with the lateralization, self.id may have duplicate values so ismember is necessary iid, inm = ismember(self.id, new_map) iid = np.where(iid)[0] mapind = np.zeros_like( self.id) + I_ROOT # non assigned regions are root mapind[iid] = iid # regions present in the list have the same index # Starting by the higher up levels in the hierarchy, assign all descendants to the mapping for i in np.argsort(self.level[iid]): descendants = self.descendants(self.id[iid[i]]).id _, idesc, _ = np.intersect1d(self.id, descendants, return_indices=True) mapind[idesc] = iid[i] mapind[0] = I_VOID # void stays void # to delateralize the regions, assign the positive index to all mapind elements if lateralize is False: _, iregion = ismember(np.abs(self.id), self.id) mapind = mapind[iregion] return mapind
def multiple_spike_trains(firing_rates=None, rec_len_secs=1000, cluster_ids=None, amplitude_noise=20 * 1e-6): """ :param firing_rates: list or np.array of firing rates (spikes per second) :param rec_len_secs: recording length in seconds :return: spike_times, spike_amps, spike_clusters """ if firing_rates is None: firing_rates = np.random.randint(150, 600, 10) if cluster_ids is None: cluster_ids = np.arange(firing_rates.size) ca = np.exp(np.random.normal(5.5, 0.5, firing_rates.size)) / 1e6 # output is in V st = np.empty(0) sc = np.empty(0) for i, firing_rate in enumerate(firing_rates): t = generate_spike_train(firing_rate=firing_rate, rec_len_secs=rec_len_secs) st = np.r_[st, t] sc = np.r_[sc, np.zeros(t.size, dtype=np.int32) + cluster_ids[i]] ordre = st.argsort() st = st[ordre] sc = np.int32(sc[ordre]) _, isc = ismember(sc, cluster_ids) # clusters ids may be arbitrary: re-index sa = np.maximum(ca[isc] + np.random.randn(st.size) * amplitude_noise, 25 * 1e-6) return st, sa, sc
def test_clusters_metrics(): np.random.seed(54) rec_length = 1000 frs = np.array([3, 100, 80, 40]) # firing rates cid = [0, 1, 3, 4] # here we make sure one of the clusters has no spike t, a, c = multiple_spike_trains(firing_rates=frs, rec_len_secs=rec_length, cluster_ids=cid) d = np.sin(2 * np.pi * c / rec_length * t) * 100 # sinusoidal shift where cluster id drives f def _assertions(dfm, idf, target_cid): # dfm: qc dataframe, idf: indices of existing clusters in dfm, cid: cluster ids assert np.allclose(dfm['amp_median'][idf] / np.exp(5.5) * 1e6, 1, rtol=1.1) assert np.allclose(dfm['amp_std_dB'][idf] / 20 * np.log10(np.exp(0.5)), 1, rtol=1.1) assert np.allclose(dfm['drift'][idf], np.array(cid) * 100 * 4 * 3.6, rtol=1.1) assert np.allclose(dfm['firing_rate'][idf], frs, rtol=1.1) assert np.allclose(dfm['cluster_id'], target_cid) # check with missing clusters dfm = quick_unit_metrics(c, t, a, d, cluster_ids=np.arange(5), tbounds=[100, 900]) idf, _ = ismember(np.arange(5), cid) _assertions(dfm, idf, np.arange(5))
def test_uuids_intersections(self): ntotal = 500 nsub = 17 nadd = 3 eids = uuid2np([uuid.uuid4() for _ in range(ntotal)]) np.random.seed(42) isel = np.floor(np.argsort(np.random.random(nsub)) / nsub * ntotal).astype(np.int16) sids = np.r_[eids[isel, :], uuid2np([uuid.uuid4() for _ in range(nadd)])] np.random.shuffle(sids) # check the intersection v, i0, i1 = intersect2d(eids, sids) assert np.all(eids[i0, :] == sids[i1, :]) assert np.all(np.sort(isel) == np.sort(i0)) v_, i0_, i1_ = np.intersect1d(eids[:, 0], sids[:, 0], return_indices=True) assert np.setxor1d(v_, v[:, 0]).size == 0 assert np.setxor1d(i0, i0_).size == 0 assert np.setxor1d(i1, i1_).size == 0 for a, b in zip(ismember2d(sids, eids), ismember(sids[:, 0], eids[:, 0])): assert np.all(a == b) # check conversion to numpy back and forth uuids = [uuid.uuid4() for _ in np.arange(4)] np_uuids = uuid2np(uuids) assert np2uuid(np_uuids) == uuids
def remap(ids, source='Allen', dest='Beryl', output='acronym'): br = BrainRegions() _, inds = ismember(ids, br.id[br.mappings[source]]) ids = br.id[br.mappings[dest][inds]] if output == 'id': return br.id[br.mappings[dest][inds]] elif output == 'acronym': return br.get(br.id[br.mappings[dest][inds]])['acronym']
def _make_dataclass_offline(self, eid, dataset_types=None, cache_dir=None, **kwargs): if self._cache.size == 0: return SessionDataInfo() # select the session npeid = parquet.str2np(eid)[0] df = self._cache[self._cache['eid_0'] == npeid[0]] df = df[df['eid_1'] == npeid[1]] # select datasets df = df[ismember(df['dataset_type'], dataset_types)[0]] return SessionDataInfo.from_pandas(df, self._get_cache_dir(cache_dir))
def _navigate_tree(self, ids, direction='down'): """ Private method to navigate the tree and get all related objects either up or down :param ids: :param direction: :return: Bunch """ indices = ismember(self.id, ids)[0] count = np.sum(indices) while True: if direction == 'down': indices |= ismember(self.parent, self.id[indices])[0] elif direction == 'up': indices |= ismember(self.id, self.parent[indices])[0] else: raise ValueError("direction should be either 'up' or 'down'") if count == np.sum(indices): # last iteration didn't find any match break else: count = np.sum(indices) return self.get(self.id[indices])
def find_traj_is_best(self, provenance='Histology track'): val = PROV_2_VAL[provenance] next_provenance = VAL_2_PROV[val + 20] if not 'traj' in self.traj[provenance].keys(): self.get_traj_for_provenance(provenance) if not 'traj' in self.traj[next_provenance].keys(): self.get_traj_for_provenance(next_provenance) isin, _ = ismember(self.traj[provenance]['ins'], self.traj[next_provenance]['ins']) self.traj[provenance]['is_best'] = np.where(np.invert(isin))[0] # Special exception for planned provenance if provenance == 'Planned': next_provenance = VAL_2_PROV[val + 40] if not 'traj' in self.traj[next_provenance].keys(): self.get_traj_for_provenance(next_provenance) isin, _ = ismember(self.traj[provenance]['ins'][self.traj[provenance]['is_best']], self.traj[next_provenance]['ins']) self.traj[provenance]['is_best'] = (self.traj[provenance]['is_best'] [np.where(np.invert(isin))[0]])
def test_ismember2d_uuids(self): nb = 20 na = 500 np.random.seed(42) a = np.random.randint(0, nb + 3, na) b = np.arange(nb) lia, locb = bnum.ismember(a, b) bb = np.random.randint(low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max, size=(nb, 2), dtype=np.int64) aa = np.zeros((na, 2), dtype=np.int64) aa[lia, :] = bb[locb, :] lia_, locb_ = bnum.ismember2d(aa, bb) assert np.all(lia == lia_) & np.all(locb == locb_) bb[:, 0] = 0 aa[:, 0] = 0 # if the first column is equal, the distinction is to be made on the second\ assert np.unique(bb[:, 1]).size == nb lia_, locb_ = bnum.ismember2d(aa, bb) assert np.all(lia == lia_) & np.all(locb == locb_)
def remap(ids, source='Allen', dest='Beryl'): _, inds = ismember(ids, br.id[br.mappings[source]]) return br.id[br.mappings[dest][inds]]
def __init__(self, res_um=25, brainmap='Allen', scaling=np.array([1, 1, 1]), mock=False, hist_path=None): """ :param res_um: 10, 25 or 50 um :param brainmap: defaults to 'Allen', see ibllib.atlas.BrainRegion for re-mappings :param scaling: scale factor along ml, ap, dv for squeeze and stretch ([1, 1, 1]) :param mock: for testing purpose :param hist_path :return: atlas.BrainAtlas """ par = params.read('one_params') FLAT_IRON_ATLAS_REL_PATH = Path('histology', 'ATLAS', 'Needles', 'Allen') LUT_VERSION = "v01" # version 01 is the lateralized version regions = BrainRegions() xyz2dims = np.array([1, 0, 2]) # this is the c-contiguous ordering dims2xyz = np.array([1, 0, 2]) # we use Bregma as the origin self.res_um = res_um ibregma = (ALLEN_CCF_LANDMARKS_MLAPDV_UM['bregma'] / self.res_um) dxyz = self.res_um * 1e-6 * np.array([1, -1, -1]) * scaling if mock: image, label = [ np.zeros((528, 456, 320), dtype=np.int16) for _ in range(2) ] label[:, :, 100: 105] = 1327 # lookup index for retina, id 304325711 (no id 1327) else: path_atlas = Path(par.CACHE_DIR).joinpath(FLAT_IRON_ATLAS_REL_PATH) file_image = hist_path or path_atlas.joinpath( f'average_template_{res_um}.nrrd') # get the image volume if not file_image.exists(): _download_atlas_flatiron(file_image, FLAT_IRON_ATLAS_REL_PATH, par) # get the remapped label volume file_label = path_atlas.joinpath(f'annotation_{res_um}.nrrd') if not file_label.exists(): _download_atlas_flatiron(file_label, FLAT_IRON_ATLAS_REL_PATH, par) file_label_remap = path_atlas.joinpath( f'annotation_{res_um}_lut_{LUT_VERSION}.npz') if not file_label_remap.exists(): label = self._read_volume(file_label) _logger.info("computing brain atlas annotations lookup table") # lateralize atlas: for this the regions of the left hemisphere have primary # keys opposite to to the normal ones lateral = np.zeros(label.shape[xyz2dims[0]]) lateral[int(np.floor(ibregma[0]))] = 1 lateral = np.sign( np.cumsum(lateral)[np.newaxis, :, np.newaxis] - 0.5) label = label * lateral _, im = ismember(label, regions.id) label = np.reshape(im.astype(np.uint16), label.shape) _logger.info(f"saving {file_label_remap} ...") np.savez_compressed(file_label_remap, label) # loads the files label = self._read_volume(file_label_remap) image = self._read_volume(file_image) super().__init__(image, label, dxyz, regions, ibregma, dims2xyz=dims2xyz, xyz2dims=xyz2dims)
def quick_unit_metrics(spike_clusters, spike_times, spike_amps, spike_depths, params=METRICS_PARAMS, cluster_ids=None, tbounds=None): """ Computes single unit metrics from only the spike times, amplitudes, and depths for a set of units. Metrics computed: 'amp_max', 'amp_min', 'amp_median', 'amp_std_dB', 'contamination', 'contamination_alt', 'drift', 'missed_spikes_est', 'noise_cutoff', 'presence_ratio', 'presence_ratio_std', 'slidingRP_viol', 'spike_count' Parameters (see the METRICS_PARAMS constant) ---------- spike_clusters : ndarray_like A vector of the unit ids for a set of spikes. spike_times : ndarray_like A vector of the timestamps for a set of spikes. spike_amps : ndarray_like A vector of the amplitudes for a set of spikes. spike_depths : ndarray_like A vector of the depths for a set of spikes. clusters_id: (optional) lists of cluster ids. If not all clusters are represented in the spikes_clusters (ie. cluster has no spike), this will ensure the output size is consistent with the input arrays. tbounds: (optional) list or 2 elements array containing a time-selection to perform the metrics computation on. params : dict (optional) Parameters used for computing some of the metrics in the function: 'presence_window': float The time window (in s) used to look for spikes when computing the presence ratio. 'refractory_period': float The refractory period used when computing isi violations and the contamination estimate. 'min_isi': float The minimum interspike-interval (in s) for counting duplicate spikes when computing the contamination estimate. 'spks_per_bin_for_missed_spks_est': int The number of spikes per bin used to compute the spike amplitude pdf for a unit, when computing the missed spikes estimate. 'std_smoothing_kernel_for_missed_spks_est': float The standard deviation for the gaussian kernel used to compute the spike amplitude pdf for a unit, when computing the missed spikes estimate. 'min_num_bins_for_missed_spks_est': int The minimum number of bins used to compute the spike amplitude pdf for a unit, when computing the missed spikes estimate. Returns ------- r : bunch A bunch whose keys are the computed spike metrics. Notes ----- This function is called by `ephysqc.unit_metrics_ks2` which is called by `spikes.ks2_to_alf` during alf extraction of an ephys dataset in the ibl ephys extraction pipeline. Examples -------- 1) Compute quick metrics from a ks2 output directory: >>> from ibllib.ephys.ephysqc import phy_model_from_ks2_path >>> m = phy_model_from_ks2_path(path_to_ks2_out) >>> cluster_ids = m.spike_clusters >>> ts = m.spike_times >>> amps = m.amplitudes >>> depths = m.depths >>> r = bb.metrics.quick_unit_metrics(cluster_ids, ts, amps, depths) """ metrics_list = [ 'cluster_id', 'amp_max', 'amp_min', 'amp_median', 'amp_std_dB', 'contamination', 'contamination_alt', 'drift', 'missed_spikes_est', 'noise_cutoff', 'presence_ratio', 'presence_ratio_std', 'slidingRP_viol', 'spike_count' ] from brainbox.numerical import between_sorted if tbounds: ispi = between_sorted(spike_times, tbounds) spike_times = spike_times[ispi] spike_clusters = spike_clusters[ispi] spike_amps = spike_amps[ispi] spike_depths = spike_depths[ispi] if cluster_ids is None: cluster_ids = np.unique(spike_clusters) nclust = cluster_ids.size r = Bunch({k: np.full((nclust, ), np.nan) for k in metrics_list}) r['cluster_id'] = cluster_ids # vectorized computation of basic metrics such as presence ratio and firing rate tmin = spike_times[0] tmax = spike_times[-1] presence_ratio = bincount2D(spike_times, spike_clusters, xbin=params['presence_window'], ybin=cluster_ids, xlim=[tmin, tmax])[0] r.presence_ratio = np.sum(presence_ratio > 0, axis=1) / presence_ratio.shape[1] r.presence_ratio_std = np.std(presence_ratio, axis=1) r.spike_count = np.sum(presence_ratio, axis=1) r.firing_rate = r.spike_count / (tmax - tmin) # computing amplitude statistical indicators by aggregating over cluster id camp = pd.DataFrame(np.c_[spike_amps, 20 * np.log10(spike_amps), spike_clusters], columns=['amps', 'log_amps', 'clusters']) camp = camp.groupby('clusters') ir, ib = ismember(r.cluster_id, camp.clusters.unique()) r.amp_min[ir] = np.array(camp['amps'].min()) r.amp_max[ir] = np.array(camp['amps'].max()) # this is the geometric median r.amp_median[ir] = np.array(10**(camp['log_amps'].median() / 20)) r.amp_std_dB[ir] = np.array(camp['log_amps'].std()) # loop over each cluster to compute the rest of the metrics for ic in np.arange(nclust): # slice the spike_times array ispikes = spike_clusters == cluster_ids[ic] if np.all(~ispikes): # if this cluster has no spikes, continue continue ts = spike_times[ispikes] amps = spike_amps[ispikes] depths = spike_depths[ispikes] # compute metrics r.contamination_alt[ic] = contamination_alt( ts, rp=params['refractory_period']) r.contamination[ic], _ = contamination(ts, tmin, tmax, rp=params['refractory_period'], min_isi=params['min_isi']) r.slidingRP_viol[ic] = slidingRP_viol( ts, bin_size=params['bin_size'], thresh=params['RPslide_thresh'], acceptThresh=params['acceptable_contamination']) r.noise_cutoff[ic] = noise_cutoff( amps, quartile_length=params['nc_quartile_length'], n_bins=params['nc_bins'], n_low_bins=params['nc_n_low_bins']) r.missed_spikes_est[ic], _, _ = missed_spikes_est( amps, spks_per_bin=params['spks_per_bin_for_missed_spks_est'], sigma=params['std_smoothing_kernel_for_missed_spks_est'], min_num_bins=params['min_num_bins_for_missed_spks_est']) # wonder if there is a need to low-cut this r.drift[ic] = np.sum(np.abs(np.diff(depths))) / (tmax - tmin) * 3600 r.label = compute_labels(r) return r
def _check_ismember(a, b, lia_, locb_): lia, locb = bnum.ismember(a, b) self.assertTrue(np.all(a[lia] == b[locb])) self.assertTrue(np.all(lia_ == lia)) self.assertTrue(np.all(locb_ == locb))