def __init__(self, mapping): mapping = validate(mapping) super(Config, self).__init__(mapping) self._logger = logging.getLogger(__name__) # init the rest of the parameters, these parameters are used # througout the pipeline so we compute them once to avoid redudant # computations # GEOMETRY PARAMETERS path_to_geom = path.join(self.data.root_folder, self.data.geometry) self._set_param('geom', geom.parse(path_to_geom, self.recordings.n_channels)) neigh_channels = geom.find_channel_neighbors( self.geom, self.recordings.spatial_radius) self._set_param('neigh_channels', neigh_channels) channel_groups = geom.make_channel_groups(self.recordings.n_channels, self.neigh_channels, self.geom) self._set_param('channel_groups', channel_groups) self._set_param( 'spike_size', int( np.round(self.recordings.spike_size_ms * self.recordings.sampling_rate / (2 * 1000))))
def __init__(self, path_to_recordings, path_to_geom=None, spike_size=None, neighbor_radius=None, dtype=None, n_channels=None, data_format=None, mmap=True, waveform_dtype='float32'): self.data = RecordingsReader(path_to_recordings, dtype, n_channels, data_format, mmap, output_shape='long') if path_to_geom is not None: self.geom = geom.parse(path_to_geom, n_channels) self.neighbor_radius = neighbor_radius self.neigh_matrix = geom.find_channel_neighbors( self.geom, neighbor_radius) self.n_channels = self.data.channels self.spike_size = spike_size if waveform_dtype == 'default': waveform_dtype = dtype self.waveform_dtype = waveform_dtype self.logger = logging.getLogger(__name__)
def __init__(self, path_to_recordings, path_to_geom=None, spike_size=None, neighbor_radius=None, dtype=None, n_channels=None, data_order=None, loader='memmap', waveform_dtype='float32'): self.reader = RecordingsReader(path_to_recordings, dtype, n_channels, data_order, loader) if path_to_geom is not None: self.geom = geom.parse(path_to_geom, n_channels) self.neighbor_radius = neighbor_radius self.neigh_matrix = geom.find_channel_neighbors( self.geom, neighbor_radius) self.n_channels = self.reader.channels self.spike_size = spike_size if waveform_dtype == 'default': waveform_dtype = dtype self.waveform_dtype = waveform_dtype self.logger = logging.getLogger(__name__)
def __init__(self, mapping, output_directory=None): self._logger = logging.getLogger(__name__) # FIXME: not raising errors due to schema validation for now mapping = validate(mapping, silent=True) _processes = mapping['resources']['processes'] mapping['resources']['processes'] = ( multiprocess.cpu_count() if _processes == 'max' else _processes) self._frozenjson = FrozenJSON(mapping) if output_directory is not None: if path.isabs(output_directory): self._path_to_output_directory = output_directory else: _ = Path(self.data.root_folder, output_directory) self._path_to_output_directory = str(_) else: self._path_to_output_directory = None # init the rest of the parameters, these parameters are used # througout the pipeline so we compute them once to avoid redudant # computations # GEOMETRY PARAMETERS path_to_geom = path.join(self.data.root_folder, self.data.geometry) self._set_param('geom', geom.parse(path_to_geom, self.recordings.n_channels)) # check dimensions of the geometry file n_channels_geom, _ = self.geom.shape if self.recordings.n_channels != n_channels_geom: raise ValueError('Channels in the geometry file ({}) does not ' 'value in the configuration file ({})'.format( n_channels_geom, self.recordings.n_channels)) neigh_channels = geom.find_channel_neighbors( self.geom, self.recordings.spatial_radius) self._set_param('neigh_channels', neigh_channels) channel_groups = geom.make_channel_groups(self.recordings.n_channels, self.neigh_channels, self.geom) self._set_param('channel_groups', channel_groups) self._set_param( 'spike_size', int( np.round(self.recordings.spike_size_ms * self.recordings.sampling_rate / (2 * 1000)))) channel_index = geom.make_channel_index(self.neigh_channels, self.geom, steps=2) self._set_param('channel_index', channel_index)
def test_can_use_threshold_detector(data, data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['recordings']['n_channels']) neighbors = find_channel_neighbors(geometry, radius=70) # FIXME: using the same formula from yass/config/config.py, might be # better to avoid having this hardcoded spike_size = int( np.round(data_info['recordings']['spike_size_ms'] * data_info['recordings']['sampling_rate'] / (2 * 1000))) detect._threshold(data, neighbors, spike_size, 5)
def test_can_compute_whiten_matrix(data, data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['recordings']['n_channels']) neighbors = find_channel_neighbors(geometry, radius=70) channel_index = make_channel_index(neighbors, geometry) # FIXME: using the same formula from yass/config/config.py, might be # better to avoid having this hardcoded spike_size = int( np.round(data_info['recordings']['spike_size_ms'] * data_info['recordings']['sampling_rate'] / (2 * 1000))) whiten._matrix(data, channel_index, spike_size)
def __init__(self, rec_file, geom_file, sample_rate, n_batches, batch_time_samples, n_chan, radius, scale=1e2, filter_std=True, whiten=True): """Sets up the object for reading from a binary file. Parameters ---------- rec_file: str Path to binary file that contains the raw recording file. geom_file: str Path to text file containing the geometry file. The file should contain n_chan lines and each line should contain two numbers that are separated by ' '. sample_rate: int Recording sample rate in Hz n_batches: int Processes the recording in n_batches number of consecuitive segments that start from the beginning. batch_time_samples: int Number of time samples per each batch to be used. filter_std: bool The iterator both filters and standardizes the recording (dividing by standard deviation. whiten: bool Spatially whiten the recording. scale: float In case filter and whitening is not needed and the binary data is scaled up. """ self.s_rate = sample_rate self.batch_time_samples = batch_time_samples self.n_batches = n_batches self.n_chan = n_chan self.radius = radius self.geometry = parse(geom_file, n_chan) self.neighbs = find_channel_neighbors(self.geometry, self.radius) self.filter_std = filter_std self.whiten = whiten self.scale = scale self.file = open(rec_file, 'r')
def __init__(self, mapping): mapping = validate(mapping) super(Config, self).__init__(mapping) self._logger = logging.getLogger(__name__) # init the rest of the parameters, these parameters are used # througout the pipeline so we compute them once to avoid redudant # computations # GEOMETRY PARAMETERS path_to_geom = path.join(self.data.root_folder, self.data.geometry) self._set_param('geom', geom.parse(path_to_geom, self.recordings.n_channels)) # check dimensions of the geometry file n_channels_geom, _ = self.geom.shape if self.recordings.n_channels != n_channels_geom: raise ValueError('Channels in the geometry file ({}) does not ' 'value in the configuration file ({})' .format(n_channels_geom, self.recordings.n_channels)) neigh_channels = geom.find_channel_neighbors( self.geom, self.recordings.spatial_radius) self._set_param('neigh_channels', neigh_channels) channel_groups = geom.make_channel_groups( self.recordings.n_channels, self.neigh_channels, self.geom) self._set_param('channel_groups', channel_groups) self._set_param( 'spike_size', int( np.round(self.recordings.spike_size_ms * self.recordings.sampling_rate / (2 * 1000)))) channel_index = geom.make_channel_index(self.neigh_channels, self.geom, steps=2) self._set_param('channel_index', channel_index)
def test_can_compute_n_steps_neighbors(data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['n_channels']) neighbors = find_channel_neighbors(geometry, radius=70) n_steps_neigh_channels(neighbors, steps=2)
def test_can_compute_channel_neighbors(data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['n_channels']) find_channel_neighbors(geometry, radius=70)
def test_can_parse(data_info, path_to_geometry): parse(path_to_geometry, data_info['n_channels'])
def test_raises_error_if_txt_with_wrong_channels(path_to_txt_geometry): with pytest.raises(ValueError): geometry.parse(path_to_txt_geometry, n_channels=500)
def test_can_compute_channel_neighbors(path_to_geometry): geometry = parse(path_to_geometry, n_channels) find_channel_neighbors(geometry, radius=70)
def export(directory, output_dir): """ Generates phy input files, 'yass sort' (with the `--complete` option) must be run first to generate all the necessary files """ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) TMP_FOLDER = directory PATH_TO_CONFIG = path.join(TMP_FOLDER, 'config.yaml') CONFIG = load_yaml(PATH_TO_CONFIG) ROOT_FOLDER = CONFIG['data']['root_folder'] N_CHANNELS = CONFIG['recordings']['n_channels'] # verify that the tmp/ folder exists, otherwise abort if not os.path.exists(TMP_FOLDER): click.echo("{} directory does not exist, this means you " "haven't run 'yass sort', run it before running " "'yass export' again...".format(TMP_FOLDER)) raise click.Abort() if output_dir is None: PHY_FOLDER = path.join(TMP_FOLDER, 'phy/') else: PHY_FOLDER = output_dir if not os.path.exists(PHY_FOLDER): logger.info('Creating directory: {}'.format(PHY_FOLDER)) os.makedirs(PHY_FOLDER) # TODO: convert data to wide format # generate params.py params = generate.params(PATH_TO_CONFIG) path_to_params = path.join(PHY_FOLDER, 'params.py') with open(path_to_params, 'w') as f: f.write(params) logger.info('Saved {}...'.format(path_to_params)) # channel_positions.npy logger.info('Generating channel_positions.npy') path_to_geom = path.join(ROOT_FOLDER, CONFIG['data']['geometry']) geom = geometry.parse(path_to_geom, N_CHANNELS) path_to_channel_positions = path.join(PHY_FOLDER, 'channel_positions.npy') np.save(path_to_channel_positions, geom) logger.info('Saved {}...'.format(path_to_channel_positions)) # channel_map.npy channel_map = generate.channel_map(N_CHANNELS) path_to_channel_map = path.join(PHY_FOLDER, 'channel_map.npy') np.save(path_to_channel_map, channel_map) logger.info('Saved {}...'.format(path_to_channel_map)) # load spike train path_to_spike_train = path.join(TMP_FOLDER, 'spike_train.npy') logger.info('Loading spike train from {}...'.format(path_to_spike_train)) spike_train = np.load(path_to_spike_train) N_SPIKES, _ = spike_train.shape logger.info('Spike train contains {:,} spikes'.format(N_SPIKES)) # load templates logging.info('Loading previously saved templates...') path_to_templates = path.join(TMP_FOLDER, 'templates.npy') templates = np.load(path_to_templates) _, _, N_TEMPLATES = templates.shape # pc_features_ind.npy path_to_pc_features_ind = path.join(PHY_FOLDER, 'pc_feature_ind.npy') ch_neighbors = geometry.find_channel_neighbors neigh_channels = ch_neighbors(geom, CONFIG['recordings']['spatial_radius']) pc_feature_ind = generate.pc_feature_ind(N_SPIKES, N_TEMPLATES, N_CHANNELS, geom, neigh_channels, spike_train, templates) np.save(path_to_pc_features_ind, pc_feature_ind) # similar_templates.npy path_to_templates = path.join(TMP_FOLDER, 'templates.npy') path_to_similar_templates = path.join(PHY_FOLDER, 'similar_templates.npy') templates = np.load(path_to_templates) similar_templates = generate.similar_templates(templates) np.save(path_to_similar_templates, similar_templates) logger.info('Saved {}...'.format(path_to_similar_templates)) # spike_templates.npy and spike_times.npy path_to_spike_templates = path.join(PHY_FOLDER, 'spike_templates.npy') np.save(path_to_spike_templates, spike_train[:, 1]) logger.info('Saved {}...'.format(path_to_spike_templates)) path_to_spike_times = path.join(PHY_FOLDER, 'spike_times.npy') np.save(path_to_spike_times, spike_train[:, 0]) logger.info('Saved {}...'.format(path_to_spike_times)) # template_feature_ind.npy path_to_template_feature_ind = path.join(PHY_FOLDER, 'template_feature_ind.npy') template_feature_ind = generate.template_feature_ind( N_TEMPLATES, similar_templates) np.save(path_to_template_feature_ind, template_feature_ind) logger.info('Saved {}...'.format(path_to_template_feature_ind)) # template_features.npy templates_score = np.load(path.join(TMP_FOLDER, 'templates_score.npy')) templates_main_channel = np.load( path.join(TMP_FOLDER, 'templates_main_channel.npy')) waveforms_score = np.load(path.join(TMP_FOLDER, 'waveforms_score.npy')) path_to_template_features = path.join(PHY_FOLDER, 'template_features.npy') template_features = generate.template_features( N_SPIKES, N_CHANNELS, N_TEMPLATES, spike_train, templates_main_channel, neigh_channels, geom, templates_score, template_feature_ind, waveforms_score) np.save(path_to_template_features, template_features) logger.info('Saved {}...'.format(path_to_template_features)) # templates.npy path_to_phy_templates = path.join(PHY_FOLDER, 'templates.npy') np.save(path_to_phy_templates, np.transpose(templates, [2, 1, 0])) logging.info( 'Saved phy-compatible templates in {}'.format(path_to_phy_templates)) # templates_ind.npy templates_ind = generate.templates_ind(N_TEMPLATES, N_CHANNELS) path_to_templates_ind = path.join(PHY_FOLDER, 'templates_ind.npy') np.save(path_to_templates_ind, templates_ind) logger.info('Saved {}...'.format(path_to_templates_ind)) # whitening_mat.npy and whitening_mat_inv.npy logging.info('Generating whitening_mat.npy and whitening_mat_inv.npy...') path_to_whitening = path.join(TMP_FOLDER, 'whitening.npy') path_to_whitening_mat = path.join(PHY_FOLDER, 'whitening_mat.npy') shutil.copy2(path_to_whitening, ) logging.info('Saving copy of whitening: {} in {}'.format( path_to_whitening, path_to_whitening_mat)) path_to_whitening_mat_inv = path.join(PHY_FOLDER, 'whitening_mat_inv.npy') whitening = np.load(path_to_whitening) np.save(path_to_whitening_mat_inv, np.linalg.inv(whitening)) logger.info('Saving inverse of whitening matrix in {}...'.format( path_to_whitening_mat_inv)) logging.info('Done.')
def test_can_load_npy(path_to_npy_geometry): geom = geometry.parse(path_to_npy_geometry, n_channels=10) assert geom.shape == (10, 2)
def test_can_load_npy(path_to_npy): geom = geometry.parse(path_to_npy, n_channels=374) assert geom.shape == (374, 2)
def test_can_compute_whiten_matrix(data, path_to_geometry): geometry = parse(path_to_geometry, n_channels) neighbors = find_channel_neighbors(geometry, radius=70) whiten.matrix(data, neighbors, spike_size)
def test_can_whiten_data(data, path_to_geometry): geometry = parse(path_to_geometry, n_channels) neighbors = find_channel_neighbors(geometry, radius=70) q = whitening_matrix(data, neighbors, spike_size) whitening(data, q)
def test_can_use_threshold_detector(data, path_to_geometry): geometry = parse(path_to_geometry, n_channels) neighbors = find_channel_neighbors(geometry, radius=70) threshold_detection(data, neighbors, spike_size, 5)
def test_can_compute_n_steps_neighbors(path_to_geometry): geometry = parse(path_to_geometry, n_channels) neighbors = find_channel_neighbors(geometry, radius=70) n_steps_neigh_channels(neighbors, steps=2)
def test_can_use_threshold_detector(data, data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['n_channels']) neighbors = find_channel_neighbors(geometry, radius=70) detect._threshold(data, neighbors, data_info['spike_size'], 5)
def test_can_compute_whiten_matrix(data, data_info, path_to_geometry): geometry = parse(path_to_geometry, data_info['n_channels']) neighbors = find_channel_neighbors(geometry, radius=70) channel_index = make_channel_index(neighbors, geometry) whiten._matrix(data, channel_index, data_info['spike_size'])
def test_can_load_txt(path_to_txt_geometry): geom = geometry.parse(path_to_txt_geometry, n_channels=7) assert geom.shape == (7, 2)
def __init__(self, mapping, output_directory=None): self._logger = logging.getLogger(__name__) # FIXME: not raising errors due to schema validation for now mapping = validate(mapping, silent=True) self._frozenjson = FrozenJSON(mapping) if output_directory is not None: if path.isabs(output_directory): self._path_to_output_directory = output_directory else: _ = Path(self.data.root_folder, output_directory) self._path_to_output_directory = str(_) else: self._path_to_output_directory = None # init the rest of the parameters, these parameters are used # througout the pipeline so we compute them once to avoid redudant # computations # GEOMETRY PARAMETERS path_to_geom = path.join(self.data.root_folder, self.data.geometry) self._set_param('geom', geom.parse(path_to_geom, self.recordings.n_channels)) # check dimensions of the geometry file n_channels_geom, _ = self.geom.shape if self.recordings.n_channels != n_channels_geom: raise ValueError('Channels in the geometry file ({}) does not ' 'value in the configuration file ({})'.format( n_channels_geom, self.recordings.n_channels)) neigh_channels = geom.find_channel_neighbors( self.geom, self.recordings.spatial_radius) self._set_param('neigh_channels', neigh_channels) # spike size long (to cover full axonal propagation) spike_size = int( np.round(self.recordings.spike_size_ms * self.recordings.sampling_rate / 1000)) if spike_size % 2 == 0: spike_size += 1 self._set_param('spike_size', spike_size) # spike size center if self.recordings.center_spike_size_ms is not None: center_spike_size = int( np.round(self.recordings.center_spike_size_ms * self.recordings.sampling_rate / 1000)) if center_spike_size % 2 == 0: center_spike_size += 1 else: center_spike_size = int(np.copy(spike_size)) self._set_param('center_spike_size', center_spike_size) # channel index for nn channel_index = geom.make_channel_index(self.neigh_channels, self.geom, steps=1) self._set_param('channel_index', channel_index) # spike size to nn if self.neuralnetwork.apply_nn: if self.neuralnetwork.training.spike_size_ms is None: detect_saved_file = torch.load( self.neuralnetwork.detect.filename, map_location=lambda storage, loc: storage) spike_size_nn_detector = detect_saved_file[ 'temporal_filter1.0.weight'].shape[2] denoised_saved_file = torch.load( self.neuralnetwork.denoise.filename, map_location=lambda storage, loc: storage) spike_size_nn_denoiser = denoised_saved_file[ 'out.weight'].shape[0] del detect_saved_file del denoised_saved_file torch.cuda.empty_cache() if spike_size_nn_detector != spike_size_nn_denoiser: raise ValueError( 'input spike sizes of nn detector and denoiser do not match. change models' ) else: spike_size_nn = spike_size_nn_detector else: spike_size_nn = int( np.round(self.neuralnetwork.training.spike_size_ms * self.recordings.sampling_rate / 1000)) if spike_size_nn % 2 == 0: spike_size_nn += 1 self._set_param('spike_size_nn', spike_size_nn) else: self._set_param('spike_size_nn', center_spike_size) # torch devices devices = [] if torch.cuda.is_available(): n_processors = np.min( (torch.cuda.device_count(), self.resources.n_gpu_processors)) for j in range(n_processors): devices.append(torch.device("cuda:{}".format(j))) if len(devices) == 0: devices = [torch.device("cpu")] self._set_param('torch_devices', devices) # compute the length of recording filename_dat = os.path.join(self.data.root_folder, self.data.recordings) filesize = os.path.getsize(filename_dat) dtype = np.dtype(self.recordings.dtype) rec_len = int(filesize / dtype.itemsize / self.recordings.n_channels) self._set_param('rec_len', rec_len) # if self.recordings.final_deconv_chunk is None: start = 0 end = int(np.ceil(self.rec_len / self.recordings.sampling_rate)) else: start = int(np.floor(self.recordings.final_deconv_chunk[0])) end = int(np.ceil(self.recordings.final_deconv_chunk[1])) self._set_param('final_deconv_chunk', [start, end]) # if self.recordings.clustering_chunk is None: start = 0 end = int(np.ceil(self.rec_len / self.recordings.sampling_rate)) else: start = int(np.floor(self.recordings.clustering_chunk[0])) end = int(np.ceil(self.recordings.clustering_chunk[1])) self._set_param('clustering_chunk', [start, end])
def test_raise_error_if_unsupported_ext(): with pytest.raises(ValueError): geometry.parse('unsupported.mat', n_channels=7)
# path to 49 channels data root = os.path.expanduser('~/data/yass') path_to_data = os.path.join(root, 'ej49_data1_set1.bin') path_to_geom = os.path.join(root, 'ej49_geometry1.txt') observations = 6000000 n_channels = 49 sampling_freq = 20000 rec = np.fromfile(path_to_data, dtype='int16').reshape(observations, n_channels) rec.shape # TODO: check number of channels in neuropixel data geom = geometry.parse(path_to_geom, n_channels) neighbors = geometry.find_channel_neighbors(geom, radius=70) # get some observations from channel 0 raw_data = rec[50000:51000, 0] filtered = preprocess.butterworth(raw_data, low_freq=300, high_factor=0.1, order=3, sampling_freq=sampling_freq) standardized = preprocess.standarize(filtered, sampling_freq=sampling_freq) fix, (ax1, ax2, ax3) = plt.subplots(nrows=3) ax1.plot(raw_data)
def test_can_parse(path_to_geometry): parse(path_to_geometry, n_channels)