def __init__(self, stim_dir=None, spike_dir=None, waveform_transformer=None, waveform_loader=None): """Initialize object to hold stimulus and response data. stim_dir : directory holding files of name 'stim(\d+).wav', one wave file per trial, sampled at 200KHz. spike_dir : directory holding plaintext files of name 'spike(\d+)', one per trial, consisting of spike times separated by spaces in ms all on a single line, aligned to time zero the start of the corresponding wave file. waveform_transformer : defaults to Spectrogrammer() waveform_loader : defaults to self.load_waveform_from_wave_file But you can set this to be something else if you do not have wave files. It needs to be a function taking a filename as argument and return (waveform, fs) """ self.stim_dir = stim_dir self.spike_dir = spike_dir self.waveform_transformer = None self.error_check_filenames = True # list of files, to be set later automatically or by user self.wave_file_list = None self.spike_file_list = None # Waveform loaders and transformer objects if waveform_transformer is None: # Use a Spectrogrammer with reasonable default parameters self.waveform_transformer = Spectrogrammer() else: self.waveform_transformer = waveform_transformer if waveform_loader is None: self.waveform_loader = self.load_waveform_from_wave_file else: self.waveform_loader = waveform_loader
class STRF_experiment: """Class that holds links to stimulus and response files""" stim_file_label = 'stim' spike_file_label = 'spike' stim_file_regex = r'stim(\d+)\.wav' spike_file_regex = r'spike(\d+)' def __init__(self, stim_dir=None, spike_dir=None, waveform_transformer=None, waveform_loader=None): """Initialize object to hold stimulus and response data. stim_dir : directory holding files of name 'stim(\d+).wav', one wave file per trial, sampled at 200KHz. spike_dir : directory holding plaintext files of name 'spike(\d+)', one per trial, consisting of spike times separated by spaces in ms all on a single line, aligned to time zero the start of the corresponding wave file. waveform_transformer : defaults to Spectrogrammer() waveform_loader : defaults to self.load_waveform_from_wave_file But you can set this to be something else if you do not have wave files. It needs to be a function taking a filename as argument and return (waveform, fs) """ self.stim_dir = stim_dir self.spike_dir = spike_dir self.waveform_transformer = None self.error_check_filenames = True # list of files, to be set later automatically or by user self.wave_file_list = None self.spike_file_list = None # Waveform loaders and transformer objects if waveform_transformer is None: # Use a Spectrogrammer with reasonable default parameters self.waveform_transformer = Spectrogrammer() else: self.waveform_transformer = waveform_transformer if waveform_loader is None: self.waveform_loader = self.load_waveform_from_wave_file else: self.waveform_loader = waveform_loader def transform_all_stimuli(self, assert_sampling_rate=None, truncate=None): """Calculates spectrograms of each stimulus First loads using self.waveform_loader. Then transforms using self.waveform_transformer. Finally stores in self.t_list, self.freqs_list, and self.specgm_list If the time base is consistent, will also assign self.t and self.freqs to be the unique value for all stimuli. Otherwise these are left as None. assert_sampling_rate : if not None, assert that I got this sampling rate when loading the waveform truncate : Drop all data after this time in seconds for all stimuli Saves in attributes `specgm_list`, `t_list`, and `freqs_list`. Also attempts to store unique `t` and `freq` for all. Returns spectrograms in (n_freqs, n_timepoints) shape """ # Get list of files to transform self._set_list_of_files() # where data goes self.specgm_list = [] self.t_list = [] self.freqs_list = [] self.t = None self.freqs = None # load and transform each file for wave_file in self.wave_file_list: waveform, fs = self.waveform_loader(wave_file) if assert_sampling_rate: assert fs == assert_sampling_rate specgm, freqs, t = self.waveform_transformer.transform(waveform) if truncate: inds = (t > truncate) t = t[~inds] specgm = specgm[:, ~inds] self.specgm_list.append(specgm) self.t_list.append(t) self.freqs_list.append(freqs) # Store unique values of t and freqs (if possible) if len(self.t_list) > 0 and np.all( [np.all(tt == self.t_list[0]) for tt in self.t_list]): self.t = self.t_list[0] if len(self.freqs_list) > 0 and np.all( [ff == self.freqs_list[0] for ff in self.freqs_list]): self.freqs = self.freqs_list[0] def load_waveform_from_wave_file(self, filename, dtype=np.float): """Opens wave file and reads, assuming signed shorts""" wr = wave.Wave_read(filename) fs = wr.getframerate() sig = np.array(struct.unpack('%dh' % wr.getnframes(), wr.readframes(wr.getnframes())), dtype=dtype) wr.close() return sig, fs def _set_list_of_files(self): """Reads stimulus and response filenames from disk, if necessary. If the attributes are already set, do not reload from disk (so you can overload this behavior). In any case, error check that the lists are the same length and end with the same sequence of digits, eg [spike003, spike007] and [stim003, stim007]. """ if self.wave_file_list is None: # Find sorted list of wave files self.wave_file_list = sorted(glob.glob(os.path.join(self.stim_dir, self.stim_file_label + '*.wav'))) if self.spike_file_list is None: # Find sorted list of spike files self.spike_file_list = sorted(glob.glob(os.path.join(self.spike_dir, self.spike_file_label + '*'))) # Error checking if self.error_check_filenames: assert len(self.spike_file_list) == len(self.wave_file_list) for wave_file, spike_file in zip(self.wave_file_list, self.spike_file_list): # extract numbers on end of wave and spike files wave_num = glob.re.search(self.stim_file_regex, wave_file).groups()[0] spike_num = glob.re.search(self.spike_file_regex, spike_file).groups()[0] # test string equality (3 != 003) assert wave_num == spike_num def get_full_stimulus_matrix(self, n_delays, blanking_value=-np.inf): """Concatenate and reshape spectrograms for STRF estimation. Returned value is one large spectogram of shape (n_freqs*n_delays, total_time). Each column contains the current value and the previous n_delays. Puts (n_freqs, n_delays) blanking_values before each stimulus. Then reshapes each column to include delays: column_n = concatenated_specgram[:, n:n-n_delays:-1].flatten() The original slice corresponding to column n: concatenated_specgram[:, n:n-n_delays:-1] can be recovered as: reshaped_specgram[:, n].reshape(specgram.shape[0], n_delays) There are n_delays blanks in between each stimuli, but the total length of the returned value is the sum of the total length of the provided stimuli because those blanks are folded into the features. That is, the first entry contains the first time sample and the rest blanks; and the last sample contains the last n_delays samples. """ if len(self.specgm_list) == 0: print "nothing to concatenate, have you run transform_all_stimuli?" return # put blanks in front of each stimulus and concatenate concatenated_specgram_list = [] for specgram in self.specgm_list: # first prepend blanks specgram_with_prepended_blanks = np.concatenate([ blanking_value * np.ones((specgram.shape[0], n_delays)), specgram], axis=1) # now reshape and include delays in each feature reshaped_specgram_list = [] for n in range(n_delays, specgram_with_prepended_blanks.shape[1]): reshaped_specgram_list.append( specgram_with_prepended_blanks[:, n:n-n_delays:-1].flatten()) reshaped_specgram = np.transpose(np.array(reshaped_specgram_list)) concatenated_specgram_list.append(reshaped_specgram) concatenated_specgram = np.concatenate(concatenated_specgram_list, axis=1) return concatenated_specgram def get_concatenated_stimulus_matrix(self): """Returns a concatenated (non-reshaped) matrix of stimuli.""" return np.concatenate(self.specgm_list, axis=1) def get_concatenated_response_matrix(self, dtype=np.float, sampling_rate=1000., truncate=None): """Loads spike files from disk, returns concatenated responses. You must run transform_all_stimuli first, or otherwise set self.t_list, so that I know how to bin the spikes. truncate : if a value, throw away all spikes greater than thi if None, throw away all spikes beyond the end of the stimulus for this response Returns in shape (1, N_timepoints) """ # Set list of filenames and error check self._set_list_of_files() # load each one and histogram concatenated_psths = [] for respfile, bin_centers in zip(self.spike_file_list, self.t_list): # store responses #~ try: #~ # flatten() handles the case of only one value #~ st = np.loadtxt(respfile).flatten() #~ except IOError: #~ # this handles the case of no data #~ st = np.array([]) #~ st = st / 1000.0 s = file(respfile).readlines() st = [] for line in s: tmp = myutils.parse_space_sep(line, dtype=np.float) tmp = np.asarray(tmp) / sampling_rate if truncate: tmp = tmp[tmp <= truncate] else: tmp = tmp[tmp <= bin_centers.max()] st.append(tmp) # convert bin centers to bin edges bin_edges = bin_centers[:-1] + 0.5 * np.diff(bin_centers) bin_edges = np.concatenate([[-np.inf], bin_edges, [np.inf]]) # now histogram counts = [] for line in st: counts.append(np.histogram(line, bin_edges)[0]) counts = np.mean(counts, axis=0) # Append to growing list and check that size matches up trial-by-trial concatenated_psths.append(counts) assert len(counts) == len(bin_centers) # Return a concatenated array of response from this recording self.psth_list = concatenated_psths return np.concatenate(concatenated_psths).astype(dtype)[np.newaxis,:]
class STRF_experiment: """Class that holds links to stimulus and response files""" stim_file_label = 'stim' spike_file_label = 'spike' stim_file_regex = r'stim(\d+)\.wav' spike_file_regex = r'spike(\d+)' def __init__(self, stim_dir=None, spike_dir=None, waveform_transformer=None, waveform_loader=None): """Initialize object to hold stimulus and response data. stim_dir : directory holding files of name 'stim(\d+).wav', one wave file per trial, sampled at 200KHz. spike_dir : directory holding plaintext files of name 'spike(\d+)', one per trial, consisting of spike times separated by spaces in ms all on a single line, aligned to time zero the start of the corresponding wave file. waveform_transformer : defaults to Spectrogrammer() waveform_loader : defaults to self.load_waveform_from_wave_file But you can set this to be something else if you do not have wave files. It needs to be a function taking a filename as argument and return (waveform, fs) """ self.stim_dir = stim_dir self.spike_dir = spike_dir self.waveform_transformer = None self.error_check_filenames = True # list of files, to be set later automatically or by user self.wave_file_list = None self.spike_file_list = None # Waveform loaders and transformer objects if waveform_transformer is None: # Use a Spectrogrammer with reasonable default parameters self.waveform_transformer = Spectrogrammer() else: self.waveform_transformer = waveform_transformer if waveform_loader is None: self.waveform_loader = self.load_waveform_from_wave_file else: self.waveform_loader = waveform_loader def transform_all_stimuli(self, assert_sampling_rate=None, truncate=None): """Calculates spectrograms of each stimulus First loads using self.waveform_loader. Then transforms using self.waveform_transformer. Finally stores in self.t_list, self.freqs_list, and self.specgm_list If the time base is consistent, will also assign self.t and self.freqs to be the unique value for all stimuli. Otherwise these are left as None. assert_sampling_rate : if not None, assert that I got this sampling rate when loading the waveform truncate : Drop all data after this time in seconds for all stimuli Saves in attributes `specgm_list`, `t_list`, and `freqs_list`. Also attempts to store unique `t` and `freq` for all. Returns spectrograms in (n_freqs, n_timepoints) shape """ # Get list of files to transform self._set_list_of_files() # where data goes self.specgm_list = [] self.t_list = [] self.freqs_list = [] self.t = None self.freqs = None # load and transform each file for wave_file in self.wave_file_list: waveform, fs = self.waveform_loader(wave_file) if assert_sampling_rate: assert fs == assert_sampling_rate specgm, freqs, t = self.waveform_transformer.transform(waveform) if truncate: inds = (t > truncate) t = t[~inds] specgm = specgm[:, ~inds] self.specgm_list.append(specgm) self.t_list.append(t) self.freqs_list.append(freqs) # Store unique values of t and freqs (if possible) if len(self.t_list) > 0 and np.all( [np.all(tt == self.t_list[0]) for tt in self.t_list]): self.t = self.t_list[0] if len(self.freqs_list) > 0 and np.all( [ff == self.freqs_list[0] for ff in self.freqs_list]): self.freqs = self.freqs_list[0] def load_waveform_from_wave_file(self, filename, dtype=np.float): """Opens wave file and reads, assuming signed shorts""" wr = wave.Wave_read(filename) fs = wr.getframerate() sig = np.array(struct.unpack('%dh' % wr.getnframes(), wr.readframes(wr.getnframes())), dtype=dtype) wr.close() return sig, fs def _set_list_of_files(self): """Reads stimulus and response filenames from disk, if necessary. If the attributes are already set, do not reload from disk (so you can overload this behavior). In any case, error check that the lists are the same length and end with the same sequence of digits, eg [spike003, spike007] and [stim003, stim007]. """ if self.wave_file_list is None: # Find sorted list of wave files self.wave_file_list = sorted( glob.glob( os.path.join(self.stim_dir, self.stim_file_label + '*.wav'))) if self.spike_file_list is None: # Find sorted list of spike files self.spike_file_list = sorted( glob.glob( os.path.join(self.spike_dir, self.spike_file_label + '*'))) # Error checking if self.error_check_filenames: assert len(self.spike_file_list) == len(self.wave_file_list) for wave_file, spike_file in zip(self.wave_file_list, self.spike_file_list): # extract numbers on end of wave and spike files wave_num = glob.re.search(self.stim_file_regex, wave_file).groups()[0] spike_num = glob.re.search(self.spike_file_regex, spike_file).groups()[0] # test string equality (3 != 003) assert wave_num == spike_num def get_full_stimulus_matrix(self, n_delays, blanking_value=-np.inf): """Concatenate and reshape spectrograms for STRF estimation. Returned value is one large spectogram of shape (n_freqs*n_delays, total_time). Each column contains the current value and the previous n_delays. Puts (n_freqs, n_delays) blanking_values before each stimulus. Then reshapes each column to include delays: column_n = concatenated_specgram[:, n:n-n_delays:-1].flatten() The original slice corresponding to column n: concatenated_specgram[:, n:n-n_delays:-1] can be recovered as: reshaped_specgram[:, n].reshape(specgram.shape[0], n_delays) There are n_delays blanks in between each stimuli, but the total length of the returned value is the sum of the total length of the provided stimuli because those blanks are folded into the features. That is, the first entry contains the first time sample and the rest blanks; and the last sample contains the last n_delays samples. """ if len(self.specgm_list) == 0: print "nothing to concatenate, have you run transform_all_stimuli?" return # put blanks in front of each stimulus and concatenate concatenated_specgram_list = [] for specgram in self.specgm_list: # first prepend blanks specgram_with_prepended_blanks = np.concatenate([ blanking_value * np.ones( (specgram.shape[0], n_delays)), specgram ], axis=1) # now reshape and include delays in each feature reshaped_specgram_list = [] for n in range(n_delays, specgram_with_prepended_blanks.shape[1]): reshaped_specgram_list.append( specgram_with_prepended_blanks[:, n:n - n_delays:-1].flatten()) reshaped_specgram = np.transpose(np.array(reshaped_specgram_list)) concatenated_specgram_list.append(reshaped_specgram) concatenated_specgram = np.concatenate(concatenated_specgram_list, axis=1) return concatenated_specgram def get_concatenated_stimulus_matrix(self): """Returns a concatenated (non-reshaped) matrix of stimuli.""" return np.concatenate(self.specgm_list, axis=1) def get_concatenated_response_matrix(self, dtype=np.float, sampling_rate=1000., truncate=None): """Loads spike files from disk, returns concatenated responses. You must run transform_all_stimuli first, or otherwise set self.t_list, so that I know how to bin the spikes. truncate : if a value, throw away all spikes greater than thi if None, throw away all spikes beyond the end of the stimulus for this response Returns in shape (1, N_timepoints) """ # Set list of filenames and error check self._set_list_of_files() # load each one and histogram concatenated_psths = [] for respfile, bin_centers in zip(self.spike_file_list, self.t_list): # store responses #~ try: #~ # flatten() handles the case of only one value #~ st = np.loadtxt(respfile).flatten() #~ except IOError: #~ # this handles the case of no data #~ st = np.array([]) #~ st = st / 1000.0 s = file(respfile).readlines() st = [] for line in s: tmp = myutils.parse_space_sep(line, dtype=np.float) tmp = np.asarray(tmp) / sampling_rate if truncate: tmp = tmp[tmp <= truncate] else: tmp = tmp[tmp <= bin_centers.max()] st.append(tmp) # convert bin centers to bin edges bin_edges = bin_centers[:-1] + 0.5 * np.diff(bin_centers) bin_edges = np.concatenate([[-np.inf], bin_edges, [np.inf]]) # now histogram counts = [] for line in st: counts.append(np.histogram(line, bin_edges)[0]) counts = np.mean(counts, axis=0) # Append to growing list and check that size matches up trial-by-trial concatenated_psths.append(counts) assert len(counts) == len(bin_centers) # Return a concatenated array of response from this recording self.psth_list = concatenated_psths return np.concatenate(concatenated_psths).astype(dtype)[np.newaxis, :]