def load_eegData(fpath, eegChans, bitVolts=0.195): # Load EEG data of selected channel print([hfunct.time_string(), 'DEBUG: load from file']) continuous_data = NWBio.load_continuous_as_array(fpath, eegChans) if not (continuous_data is None): print([hfunct.time_string(), 'DEBUG: extract timestamps']) timestamps = np.array(continuous_data['timestamps']) print([hfunct.time_string(), 'DEBUG: transforming into float32']) data = continuous_data['continuous'].astype(np.float32) sampling_rate = NWBio.OpenEphys_SamplingRate() else: # If no raw data available, load downsampled data for that tetrode lowpass_data = NWBio.load_tetrode_lowpass(fpath) timestamps = np.array(lowpass_data['tetrode_lowpass_timestamps']) sampling_rate = NWBio.OpenEphys_SamplingRate() / float(lowpass_downsampling) if not isinstance(eegChans, list): eegChans = [eegChans] tetrodes = [] for eegChan in eegChans: tetrodes.append(hfunct.channels_tetrode(eegChan)) data = np.array(lowpass_data['tetrode_lowpass'][:, tetrodes]).astype(np.float32) # Scale data with bitVolts value to convert from raw data to voltage values print([hfunct.time_string(), 'DEBUG: converting to bitVolts']) data = data * bitVolts return {'data': data, 'timestamps': timestamps, 'sampling_rate': sampling_rate}
def write_set_file(setFileName, new_values_dict): sourcefile = os.path.join(package_path, 'Utils', 'SetFileBase.set') # Read in base .set file with open(sourcefile, 'rb') as file: lines = file.readlines() # Correct lines based on new_values_dict for key in new_values_dict.keys(): for nl, line in enumerate(lines): if hfunct.encode_bytes(key + ' ') in line: lines[nl] = hfunct.encode_bytes(key + ' ' + new_values_dict[key] + '\r\n') break # Write the .set file with corrected lines with open(setFileName, 'wb') as file: file.writelines(lines)
def concatenate_spike_data_across_recordings(spike_data, data_time_edges, recording_edges): new_spike_data = [None] * len(spike_data[0]) for n_tet in range(len(new_spike_data)): print([hfunct.time_string(), 'DEBUG: concatenating data for tetrode ', n_tet]) waveforms = [data[n_tet]['waveforms'] for data in spike_data] clusterIDs = [data[n_tet]['clusterIDs'] for data in spike_data] timestamps = [data[n_tet]['timestamps'] for data in spike_data] for n_rec in range(len(timestamps)): # Transform timestamps to continuous recordings timestamps[n_rec] = timestamps[n_rec] - data_time_edges[n_rec][0] + recording_edges[n_rec][0] print([hfunct.time_string(), 'DEBUG: The concatenation']) new_spike_data[n_tet] = {'waveforms': np.concatenate(waveforms, axis=0), 'clusterIDs': np.concatenate(clusterIDs, axis=0), 'timestamps': np.concatenate(timestamps, axis=0)} return new_spike_data
def concatenate_eegData_across_recordings(eegData, data_time_edges, recording_edges): for n_rec in range(len(eegData)): # Crop data outside data_time_edges and transform timestamps to continuous recording idx_outside_data_time = eegData[n_rec]['timestamps'] < data_time_edges[n_rec][0] idx_outside_data_time = np.logical_or(idx_outside_data_time, eegData[n_rec]['timestamps'] > data_time_edges[n_rec][1]) idx_outside_data_time = np.where(idx_outside_data_time)[0] eegData[n_rec]['timestamps'] = np.delete(eegData[n_rec]['timestamps'], idx_outside_data_time, axis=0) eegData[n_rec]['timestamps'] = eegData[n_rec]['timestamps'] - data_time_edges[n_rec][0] + recording_edges[n_rec][0] eegData[n_rec]['data'] = np.delete(eegData[n_rec]['data'], idx_outside_data_time, axis=0) # Concatenate data and timestamps print([hfunct.time_string(), 'DEBUG: The concatenation']) new_eegData = {'data': np.concatenate([x['data'] for x in eegData], axis=0), 'timestamps': np.concatenate([x['timestamps'] for x in eegData], axis=0), 'sampling_rate': eegData[0]['sampling_rate']} return new_eegData
def createAxonaData_for_NWBfile(OpenEphysDataPath, spike_name='first_available', channel_map=None, subfolder='AxonaData', eegChans=None, pixels_per_metre=None, show_output=False, clustering_name=None): # Construct path for AxonaData and get experiment info AxonaDataPath = os.path.join(os.path.dirname(OpenEphysDataPath), subfolder) experiment_info = getExperimentInfo(OpenEphysDataPath) # Get channel_map for this dataset if channel_map is None: if NWBio.check_if_settings_available(OpenEphysDataPath,'/General/channel_map/'): channel_map = NWBio.load_settings(OpenEphysDataPath,'/General/channel_map/') else: raise ValueError('Channel map could not be generated. Enter channels to process.') # Get position data for this recording data_time_edges = NWBio.get_processed_tracking_data_timestamp_edges(OpenEphysDataPath) if NWBio.check_if_processed_position_data_available(OpenEphysDataPath): posdata = NWBio.load_processed_tracking_data(OpenEphysDataPath) else: posdata = None # Create AxonaData separately for each recording area for area in channel_map.keys(): # Load spike data tetrode_nrs = hfunct.get_tetrode_nrs(channel_map[area]['list']) print('Loading spikes for tetrodes nr: ' + ', '.join(map(str, tetrode_nrs))) if spike_name == 'first_available': spike_data = get_first_available_spike_data(OpenEphysDataPath, tetrode_nrs, use_idx_keep=True, use_badChan=True, clustering_name=clustering_name) else: spike_data = NWBio.load_spikes(OpenEphysDataPath, tetrode_nrs=tetrode_nrs, spike_name=spike_name, use_idx_keep=True, use_badChan=True, clustering_name=clustering_name) # Load eeg data if eegChans is None: eegData = None else: eegChansInArea_Bool = [x in channel_map[area]['list'] for x in eegChans] if any(eegChansInArea_Bool): eegChansInArea = [x for (x,y) in zip(eegChans, eegChansInArea_Bool) if y] print('Loading LFP data for channels: ' + ', '.join(map(str, eegChansInArea))) eegData = load_eegData(OpenEphysDataPath, eegChansInArea) else: eegData = None createAxonaData(AxonaDataPath, spike_data, data_time_edges, posdata=posdata, experiment_info=experiment_info, axona_file_name=area, eegData=eegData, pixels_per_metre=pixels_per_metre, show_output=show_output)
def create_DACQ_waveform_data(spike_data, data_time_edges, input_sampling_frequency=30000, output_sampling_frequency=48000, output_timestemps=50): ''' This is a function to create_AxonaData waveforms using multiprocessing. See create_DACQ_waveform_data_for_single_tetrode() for description of input arguments. spike_data - list of spike_data_tet - see create_DACQ_waveform_data_for_single_tetrode() ''' input_args = [] for spike_data_tet in spike_data: input_args.append((spike_data_tet, data_time_edges, input_sampling_frequency, output_sampling_frequency, output_timestemps)) multiprocessor = hfunct.multiprocess() waveform_data_dacq = multiprocessor.map(create_DACQ_waveform_data_for_single_tetrode, len(input_args), args_list=input_args, max_memory_usage=0.66) return waveform_data_dacq
def write_file_in_axona_format(filename, header, header_keyorder, data): ''' Writes data in axona format ''' # Set data start and end tokens DATA_START_TOKEN = 'data_start' DATA_END_TOKEN = '\r\ndata_end\r\n' with open(filename, 'wb') as f: # Write header in the correct order for key in header_keyorder: if 'num_spikes' in key: # Replicate spaces following num_spikes in original dacq files stringval = header[key] while len(stringval) < 10: stringval += ' ' f.write(hfunct.encode_bytes(key + ' ' + stringval + '\r\n')) elif 'num_pos_samples' in key: # Replicate spaces following num_pos_samples in original dacq files stringval = header[key] while len(stringval) < 10: stringval += ' ' f.write(hfunct.encode_bytes(key + ' ' + stringval + '\r\n')) elif 'duration' in key: # Replicate spaces following duration in original dacq files stringval = header[key] while len(stringval) < 10: stringval += ' ' f.write(hfunct.encode_bytes(key + ' ' + stringval + '\r\n')) else: f.write(hfunct.encode_bytes(key + ' ' + header[key] + '\r\n')) # Write the start token string f.write(hfunct.encode_bytes(DATA_START_TOKEN)) # Write the data into the file in binary format data.tofile(f) # Write the end token string f.write(hfunct.encode_bytes(DATA_END_TOKEN))
def createAxonaData_for_multiple_NWBfiles(OpenEphysDataPaths, AxonaDataPath, spike_name='first_available', channel_map=None, eegChans=None, pixels_per_metre=None, show_output=False, clustering_name=None): # Get experiment info if len(OpenEphysDataPaths) > 1: print('Using experiment_info from first recording only.') experiment_info = getExperimentInfo(OpenEphysDataPaths[0]) # Get channel_map for this dataset if channel_map is None: if len(OpenEphysDataPaths) > 1: print('Using channel_map from first recording only.') if NWBio.check_if_settings_available(OpenEphysDataPaths[0],'/General/channel_map/'): channel_map = NWBio.load_settings(OpenEphysDataPaths[0],'/General/channel_map/') else: raise ValueError('Channel map could not be generated. Enter channels to process.') # Compute start and end times of each segment of the recording data_time_edges = [] for OpenEphysDataPath in OpenEphysDataPaths: data_time_edges.append(NWBio.get_processed_tracking_data_timestamp_edges(OpenEphysDataPath)) recording_edges = [] recording_duration = 0 for dte in data_time_edges: end_of_this_recording = recording_duration + (dte[1] - dte[0]) recording_edges.append([recording_duration, end_of_this_recording]) recording_duration = end_of_this_recording combined_data_time_edges = [recording_edges[0][0], recording_edges[-1][1]] # Get position data for these recordings print('Loading position data.') posdata = [] for OpenEphysDataPath in OpenEphysDataPaths: if NWBio.check_if_processed_position_data_available(OpenEphysDataPath): posdata.append(NWBio.load_processed_tracking_data(OpenEphysDataPath)) else: posdata.append(None) if any([x is None for x in posdata]): posdata = None else: posdata = concatenate_posdata_across_recordings(posdata, data_time_edges, recording_edges) # Create AxonaData separately for each recording area for area in channel_map.keys(): # Load spike data tetrode_nrs = hfunct.get_tetrode_nrs(channel_map[area]['list']) print('Loading spikes for tetrodes nr: ' + ', '.join(map(str, tetrode_nrs))) spike_data = [] for OpenEphysDataPath in OpenEphysDataPaths: if spike_name == 'first_available': spike_data.append(get_first_available_spike_data(OpenEphysDataPath, tetrode_nrs, use_idx_keep=True, use_badChan=True)) else: print([hfunct.time_string(), 'DEBUG: loading spikes of tet ', tetrode_nrs, ' from ', OpenEphysDataPath]) spike_data.append(NWBio.load_spikes(OpenEphysDataPath, tetrode_nrs=tetrode_nrs, spike_name=spike_name, use_idx_keep=True, use_badChan=True, clustering_name=clustering_name)) spike_data = concatenate_spike_data_across_recordings(spike_data, data_time_edges, recording_edges) # Load eeg data if eegChans is None: eegData = None else: eegChansInArea_Bool = [x in channel_map[area]['list'] for x in eegChans] if any(eegChansInArea_Bool): eegChansInArea = [x for (x,y) in zip(eegChans, eegChansInArea_Bool) if y] print('Loading LFP data for channels: ' + ', '.join(map(str, eegChansInArea))) eegData = [] for OpenEphysDataPath in OpenEphysDataPaths: print([hfunct.time_string(), 'DEBUG: loading eegData for ', OpenEphysDataPath]) eegData.append(load_eegData(OpenEphysDataPath, eegChansInArea)) print([hfunct.time_string(), 'DEBUG: concatenating eeg data']) eegData = concatenate_eegData_across_recordings(eegData, data_time_edges, recording_edges) else: eegData = None createAxonaData(AxonaDataPath, spike_data, combined_data_time_edges, posdata=posdata, experiment_info=experiment_info, axona_file_name=area, eegData=eegData, pixels_per_metre=pixels_per_metre, show_output=show_output) with open(os.path.join(AxonaDataPath, 'recording_edges'), 'w') as file: for edges, OpenEphysDataPath in zip(recording_edges, OpenEphysDataPaths): file.write(str(edges) + ' path: ' + OpenEphysDataPath + '\n')
def write_clusterIDs_in_CLU_format(clusterIDs, cluFileName): lines = [hfunct.encode_bytes(str(max(clusterIDs)) + '\r\n')] for nclu in list(clusterIDs): lines.append(hfunct.encode_bytes(str(nclu) + '\r\n')) with open(cluFileName, 'wb') as file: file.writelines(lines)
def create_DACQ_eeg_or_egf_data(eeg_or_egf, data, data_time_edges, target_range=1000): ''' EEG is lowpass filtered to half the target sampling rate, downsampled to dacq_eeg_samplingrate and inverted to same polarity as spikes in AxonaFormat. The data is also clipped to specified range in values and time. The returned array is in correct format to written to binary Axona file. eeg_or_egf - str - 'eeg' or 'egf' specifies ouput data format and sampling rate data - dict with following fields: 'data' - numpy array in dtype=numpy.float32 with dimensions (N x n_chan) - LFP data to convert. 'timestamps' - numpy one dimensional array in dtype=numpy.float32 - timestamps in seconds for each of the datapoints in data 'sampling_rate' - int or float - sampling rate of data data_time_edges - tuple with two elements: start and end time of data (in seconds). This is used with timestamps to crop EEG data outside data range. target_range - int or float - EEG data with voltage values above this will be clipped. ''' # AxonaData eeg data parameters if eeg_or_egf == 'eeg': output_SamplingRate = AxonaDataEEG_SamplingRate() elif eeg_or_egf == 'egf': output_SamplingRate = AxonaDataEGF_SamplingRate() if output_SamplingRate > data['sampling_rate']: raise ValueError('Input data sampling rate is lower than requested output data.') if data['data'].dtype != np.float32: raise ValueError('Input data dtype is not numpy.float32.') lowpass_frequency = output_SamplingRate / 2.0 # Filter data with lowpass butter filter data_in_processing = [] for n_chan in range(data['data'].shape[1]): data_in_processing.append(hfunct.butter_lowpass_filter(data['data'][:, n_chan].copy(), sampling_rate=float(data['sampling_rate']), lowpass_frequency=lowpass_frequency, filt_order=4)) # Crop data outside data_time_edges idx_outside_data_time = data['timestamps'] < data_time_edges[0] idx_outside_data_time = np.logical_or(idx_outside_data_time, data['timestamps'] > data_time_edges[1]) idx_outside_data_time = np.where(idx_outside_data_time)[0] cropped_timestamps = np.delete(data['timestamps'].copy(), idx_outside_data_time, 0) for n_chan in range(len(data_in_processing)): data_in_processing[n_chan] = np.delete(data_in_processing[n_chan], idx_outside_data_time, 0) # Resample data to dacq_eeg sampling rate original_timestamps = (cropped_timestamps - cropped_timestamps[0]) target_timestamps = np.arange(0, original_timestamps[-1], 1.0 / float(output_SamplingRate)) for n_chan in range(len(data_in_processing)): interfunct = interpolate.interp1d(original_timestamps, data_in_processing[n_chan]) data_in_processing[n_chan] = interfunct(target_timestamps) # Invert data for n_chan in range(len(data_in_processing)): data_in_processing[n_chan] = -data_in_processing[n_chan] # Adjust EEG data format and range for n_chan in range(len(data_in_processing)): data_in_processing[n_chan] = data_in_processing[n_chan] - np.mean(data_in_processing[n_chan]) for n_chan in range(len(data_in_processing)): data_in_processing[n_chan] = data_in_processing[n_chan] / target_range for n_chan in range(len(data_in_processing)): data_in_processing[n_chan] = data_in_processing[n_chan] * 127 for n_chan in range(len(data_in_processing)): data_in_processing[n_chan][data_in_processing[n_chan] > 127] = 127 data_in_processing[n_chan][data_in_processing[n_chan] < -127] = -127 # Create DACQ data eeg format if eeg_or_egf == 'eeg': dacq_eeg_dtype = [('eeg', '=b')] dacq_eeg_data_dtype = '=b' elif eeg_or_egf == 'egf': dacq_eeg_dtype = [('eeg', np.int16)] dacq_eeg_data_dtype = np.int16 eeg_data_dacq = [None] * len(data_in_processing) for n_chan in range(len(data_in_processing)): dacq_eeg = data_in_processing[n_chan].astype(dtype=dacq_eeg_data_dtype) eeg_data_dacq[n_chan] = np.zeros(dacq_eeg.size, dtype=dacq_eeg_dtype) eeg_data_dacq[n_chan]['eeg'] = dacq_eeg return eeg_data_dacq