def _create_waveform_loader(self): # Number of time samples in the templates. nsw = self.n_samples_templates if self.traces is not None: return WaveformLoader(traces=self.traces, spike_samples=self.spike_samples, n_samples_waveforms=nsw, filter_order=self.filter_order, sample_rate=self.sample_rate, )
def _init_data(self): if op.exists(self.dat_path): logger.debug("Loading traces at `%s`.", self.dat_path) traces = _dat_to_traces( self.dat_path, n_channels=self.n_channels_dat, dtype=self.dtype or np.int16, offset=self.offset, ) n_samples_t, _ = traces.shape assert _ == self.n_channels_dat else: if self.dat_path is not None: logger.warning("Error while loading data: File %s not found.", self.dat_path) traces = None n_samples_t = 0 logger.debug("Loading amplitudes.") amplitudes = read_array('amplitudes').squeeze() n_spikes, = amplitudes.shape self.n_spikes = n_spikes # Create spike_clusters if the file doesn't exist. if not op.exists(filenames['spike_clusters']): shutil.copy(filenames['spike_templates'], filenames['spike_clusters']) logger.debug("Loading %d spike clusters.", self.n_spikes) spike_clusters = read_array('spike_clusters').squeeze() spike_clusters = spike_clusters.astype(np.int32) assert spike_clusters.shape == (n_spikes, ) self.spike_clusters = spike_clusters logger.debug("Loading spike templates.") spike_templates = read_array('spike_templates').squeeze() spike_templates = spike_templates.astype(np.int32) assert spike_templates.shape == (n_spikes, ) self.spike_templates = spike_templates logger.debug("Loading spike samples.") spike_samples = read_array('spike_samples').squeeze() assert spike_samples.shape == (n_spikes, ) logger.debug("Loading templates.") templates = read_array('templates') templates[np.isnan(templates)] = 0 # templates = np.transpose(templates, (2, 1, 0)) # Unwhiten the templates. logger.debug("Loading the whitening matrix.") self.whitening_matrix = read_array('whitening_matrix') if op.exists(filenames['templates_unw']): logger.debug("Loading unwhitened templates.") templates_unw = read_array('templates_unw') templates_unw[np.isnan(templates_unw)] = 0 else: logger.debug("Couldn't find unwhitened templates, computing them.") logger.debug("Inversing the whitening matrix %s.", self.whitening_matrix.shape) wmi = np.linalg.inv(self.whitening_matrix) logger.debug("Unwhitening the templates %s.", templates.shape) templates_unw = np.dot(np.ascontiguousarray(templates), np.ascontiguousarray(wmi)) # Save the unwhitened templates. write_array('templates_unw.npy', templates_unw) n_templates, n_samples_templates, n_channels = templates.shape self.n_templates = n_templates logger.debug("Loading similar templates.") self.similar_templates = read_array('similar_templates') assert self.similar_templates.shape == (self.n_templates, self.n_templates) logger.debug("Loading channel mapping.") channel_mapping = read_array('channel_mapping').squeeze() channel_mapping = channel_mapping.astype(np.int32) assert channel_mapping.shape == (n_channels, ) # Ensure that the mappings maps to valid columns in the dat file. assert np.all(channel_mapping <= self.n_channels_dat - 1) logger.debug("Loading channel positions.") channel_positions = read_array('channel_positions') assert channel_positions.shape == (n_channels, 2) if op.exists(filenames['features']): logger.debug("Loading features.") all_features = np.load(filenames['features'], mmap_mode='r') features_ind = read_array('features_ind').astype(np.int32) # Feature subset. if op.exists(filenames['features_spike_ids']): features_spike_ids = read_array('features_spike_ids') \ .astype(np.int32) assert len(features_spike_ids) == len(all_features) self.features_spike_ids = features_spike_ids ns = len(features_spike_ids) else: ns = self.n_spikes self.features_spike_ids = None assert all_features.ndim == 3 n_loc_chan = all_features.shape[2] self.n_features_per_channel = all_features.shape[1] assert all_features.shape == ( ns, self.n_features_per_channel, n_loc_chan, ) # Check sparse features arrays shapes. assert features_ind.shape == (self.n_templates, n_loc_chan) else: all_features = None features_ind = None self.all_features = all_features self.features_ind = features_ind if op.exists(filenames['template_features']): logger.debug("Loading template features.") template_features = np.load(filenames['template_features'], mmap_mode='r') template_features_ind = read_array('template_features_ind'). \ astype(np.int32) template_features_ind = template_features_ind.copy() n_sim_tem = template_features.shape[1] assert template_features.shape == (n_spikes, n_sim_tem) assert template_features_ind.shape == (n_templates, n_sim_tem) else: template_features = None template_features_ind = None self.template_features_ind = template_features_ind self.template_features = template_features self.n_channels = n_channels # Take dead channels into account. if traces is not None: # Find the scaling factor for the traces. scaling = 1. / self._data_lim(traces[:10000]) traces = _concatenate_virtual_arrays( [traces], channel_mapping, scaling=scaling, ) else: scaling = 1. # Amplitudes self.all_amplitudes = amplitudes self.amplitudes_lim = np.max(self.all_amplitudes) # Templates self.templates = templates self.templates_unw = templates_unw assert self.templates.shape == self.templates_unw.shape self.n_samples_templates = n_samples_templates self.n_samples_waveforms = n_samples_templates self.template_lim = np.max(np.abs(self.templates)) self.duration = n_samples_t / float(self.sample_rate) self.spike_times = spike_samples / float(self.sample_rate) assert np.all(np.diff(self.spike_times) >= 0) self.cluster_ids = _unique(self.spike_clusters) # n_clusters = len(self.cluster_ids) self.channel_positions = channel_positions self.all_traces = traces # Only filter the data for the waveforms if the traces # are not already filtered. if not getattr(self, 'hp_filtered', False): logger.debug("HP filtering the data for waveforms") filter_order = 3 else: filter_order = None n_closest_channels = getattr(self, 'max_n_unmasked_channels', 16) mask_threshold = getattr(self, 'waveform_mask_threshold', None) self.closest_channels = get_closest_channels( self.channel_positions, n_closest_channels, ) self.template_masks = get_masks(self.templates, self.closest_channels) self.all_masks = MaskLoader(self.template_masks, self.spike_templates) # Fetch waveforms from traces. nsw = self.n_samples_waveforms if traces is not None: waveforms = WaveformLoader( traces=traces, masks=self.all_masks, spike_samples=spike_samples, n_samples_waveforms=nsw, filter_order=filter_order, sample_rate=self.sample_rate, mask_threshold=mask_threshold, ) else: waveforms = None self.all_waveforms = waveforms # Read the cluster groups. logger.debug("Loading the cluster groups.") self.cluster_groups = {} if op.exists(filenames['cluster_groups']): with open(filenames['cluster_groups'], 'r') as f: reader = csv.reader(f, delimiter='\t') # Skip the header. for row in reader: break for row in reader: cluster, group = row cluster = int(cluster) self.cluster_groups[cluster] = group for cluster_id in self.cluster_ids: if cluster_id not in self.cluster_groups: self.cluster_groups[cluster_id] = None