def test_compresed_uncompressed_channels_match(dataset, version): uchan = bioread.read(data_file_name(dataset, version, '')).channels cchan = bioread.read(data_file_name(dataset, version, '-c')).channels for cch, uch, i in zip(uchan, cchan, range(len(uchan))): result = np.array_equal(cch.raw_data, uch.raw_data), assert result, 'Mismatch for {0}, {1} channel {2}'.format( dataset, version, i)
def test_text_journals_match(test_file, canon_data): test_data = bioread.read(test_file) test_journal = normalize_line_endings(test_data.journal) canon_journal = normalize_line_endings(canon_data.journal) assert test_journal == canon_journal
def test_html_journals_match(test_file, canon_data): test_data = bioread.read(test_file) canon_parser = DataExtractor() test_parser = DataExtractor() canon_parser.feed(canon_data.journal) test_parser.feed(test_data.journal) assert canon_parser.content == test_parser.content
def test_full_pattern_channels_match(test_file, canon_data): test_data = bioread.read(test_file) slices = full_pattern_slices(canon_data.channels) for cch, dch, s, i in zip( canon_data.channels, test_data.channels, slices, range(len(canon_data.channels))): result = np.array_equal(cch.raw_data[s], dch.raw_data[s]) assert result, '{0} channel {1} does not match'.format(test_file, i)
def run(self): pargs = docopt(__doc__, self.argv, version=meta.version_description) infile = pargs['<acq_file>'] channel_indexes = None if pargs['--channel-indexes']: channel_indexes = [ int(i) for i in pargs['--channel-indexes'].split(',') ] data = bioread.read(infile, channel_indexes=channel_indexes) mval = pargs['--missing-as'] if pargs['--outfile']: with open(pargs['--outfile'], 'w') as f: txtwriter.write_text(data, f, channel_indexes, mval) else: txtwriter.write_text(data, sys.stdout, channel_indexes, mval)
def run(self): pargs = docopt( __doc__, self.argv, version=version.description) infile = pargs['<acq_file>'] channel_indexes = None if pargs['--channel-indexes']: channel_indexes = [ int(i) for i in pargs['--channel-indexes'].split(',')] data = bioread.read(infile, channel_indexes=channel_indexes) mval = pargs['--missing-as'] if pargs['--outfile']: with open(pargs['--outfile'], 'w') as f: txtwriter.write_text(data, f, channel_indexes, mval) else: txtwriter.write_text(data, sys.stdout, channel_indexes, mval)
def acq_to_txt(physio_input_file): this_code = 'acq_to_txt()' try: #Create output text file name txt_file = physio_input_file[:-4] + '.txt' #Write out .txt version data = bioread.read(physio_input_file) logging.info( 'Converting physio acq file to text file: {}'.format(txt_file)) with open(txt_file, 'w') as fid: txtwriter.write_text(data, fid, [0, 1, 9], None) except Exception as err: logging.error('acq to txt conversion failed! -- {}.{}'.format( this_script, this_code)) logging.error('{}'.format(err)) return None return txt_file
def kpeTaskDat(filename): # takes filename and returns data frame of onsets and duration. Needs to attach condition and subject number import pandas as pd a = bioread.read(filename) ## Take the first ready screen readyScreen = a.named_channels["Ready Screen"].raw_data readyOn = lookZero(readyScreen, 0)[0] # set difference between first appereance and TRs. # Setting to first Ready screen at 6 seconds diff = readyOn[0] - 6 # Choose Script channel by its name b = a.named_channels["Script"].raw_data scriptTime = lookZero(b, diff) duration = [] #condition = [] for i in range(len(scriptTime[0])): # run through the set 1 duration.append(scriptTime[1][i] - scriptTime[0][i]) # create duration events = pd.DataFrame({'onset': scriptTime[0], 'duration': duration}) return events
def test_read_iso_8859_1(): filename = path.join(DATA_PATH, "misc", "iso_8859_1.acq") test_data = bioread.read(filename, encoding='iso_8859_1') # This will raise an exception on fail assert len(test_data.channels)==4
def acq2bids(physio_acq_files, trigger_labels=['trigger', 'digital input']): """Reads the physiological data from a series of AcqKnowledge files and stores it in a PhysioData member Parameters ---------- physio_acq_files : list of str List of paths of the original physio files trigger_labels : list of str List with labels of the channel that carries the scanner trigger. Just one word from the channel name is enough Returns ------- physio : PhysioData PhysioData with the contents of the file """ # In case we are handled just a single file, make it a one-element list: if isinstance(physio_acq_files, str): physio_acq_files = [physio_acq_files] if not isinstance(trigger_labels, list): trigger_labels = [trigger_labels] # Init PhysioData object to hold physio signals: physio = PhysioData() # Read the files from the list, extract the relevant information and # add a new PhysioSignal to the list: trigger_channel = '' for physio_acq in physio_acq_files: # Extract data from AcqKnowledge file: physio_data = bioread.read(physio_acq) # Get the time the file was created: physiostarttime = physio_data.earliest_marker_created_at for item in physio_data.channels: physio_label = '' # specify label: if 'puls' in item.name.lower(): physio_label = 'cardiac' elif 'resp' in item.name.lower(): physio_label = 'respiratory' elif any( [tl.lower() in item.name.lower() for tl in trigger_labels]): physio_label = 'trigger' trigger_channel = item.name else: physio_label = item.name if physio_label: physio.append_signal( PhysioSignal(label=physio_label, samples_per_second=item.samples_per_second, sampling_times=item.time_index, physiostarttime=physiostarttime.timestamp(), signal=item.data, units=item.units)) # Get the "neuralstarttime" for the PhysioSignals by finding the first trigger. # We do this after we have read all signals to make sure we have read the trigger # (if present in the file. If not present, use the physiostart time. This is the # same as assuming the physiological recording started at the same time as the # neural recording.) # This assumes that the channel named "trigger" indeed contains the scanner trigger # and not something else (e.g., stimulus trigger). So we print a warning. neuralstarttime = '' if trigger_channel: print( 'Warning: Assuming "{}" channel corresponds to the scanner trigger' .format(trigger_channel)) # The sampling_times are w.r.t. the start of the recording, so we need # to also add the 'physiostarttime' (time when file was created): neuralstarttime = physio.get_scanner_onset( ) + physiostarttime.timestamp() for p_signal in physio.signals: p_signal.neuralstarttime = neuralstarttime or p_signal.physiostarttime # we also fill with NaNs the places for which there is missing data: p_signal.plug_missing_data() return physio
def search(array, term): results = [] for element in array: if term in element: results.append(element) return results # Store .csv files in a CSV sub folder to be found later for f in os.listdir(acq_path): if os.path.isfile(acq_path + '/' + f): # ensuring we only work on files if f[-4:] == ".acq": # ensuring we only work on .acq files print("Opening {}".format(acq_path + '/' + f)) acquisition = bioread.read(acq_path + '/' + f) # read the .acq file participant = re.search(regex, f).group(0) # just participant number filename = f[:-4] # filename without file extension print("Participant #{}, Filename {}".format(participant, filename)) print("Writing to {}".format(filename + '.csv', 'w')) output = open(filename + '.csv', 'w') write_text(acquisition, output, None, None) print("Done") print("") if not os.path.exists(acq_path + "/CSV"): os.makedirs(acq_path + "/CSV") move(filename + '.csv', acq_path + '/CSV/' + filename + '.csv')
def read_acqknowledge(filename, sampling_rate="max", resample_method="interpolation", impute_missing=True): """Read and format a BIOPAC's AcqKnowledge file into a pandas' dataframe. The function outputs both the dataframe and the sampling rate (retrieved from the AcqKnowledge file). Parameters ---------- filename : str Filename (with or without the extension) of a BIOPAC's AcqKnowledge file (e.g., 'data.acq'). sampling_rate : int Sampling rate (in Hz, i.e., samples/second). Since an AcqKnowledge file can contain signals recorded at different rates, harmonization is necessary in order to convert it to a DataFrame. Thus, if `sampling_rate` is set to 'max' (default), will keep the maximum recorded sampling rate and upsample the channels with lower rate if necessary (using the `signal_resample()` function). If the sampling rate is set to a given value, will resample the signals to the desired value. Note that the value of the sampling rate is outputted along with the data. resample_method : str Method of resampling (see `signal_resample()`). impute_missing : bool Sometimes, due to connections issues, the signal has some holes (short periods without signal). If 'impute_missing' is True, will automatically fill the signal interruptions using padding. Returns ---------- df : DataFrame The AcqKnowledge file as a pandas dataframe. sampling rate: int The sampling rate at which the data is sampled. See Also -------- signal_resample Example ---------- >>> import neurokit2 as nk >>> >>> data, sampling_rate = nk.read_acqknowledge('file.acq') #doctest: +SKIP """ # Try loading bioread try: import bioread except ImportError: raise ImportError( "NeuroKit error: read_acqknowledge(): the 'bioread' module is required", " for this function to run. ", "Please install it first (`pip install bioread`).", ) # Check filename if ".acq" not in filename: filename += ".acq" if os.path.exists(filename) is False: raise ValueError("NeuroKit error: read_acqknowledge(): couldn't" " find the following file: " + filename) # Read file file = bioread.read(filename) # Get desired frequency if sampling_rate == "max": freq_list = [] for channel in file.named_channels: freq_list.append(file.named_channels[channel].samples_per_second) sampling_rate = np.max(freq_list) # Loop through channels data = {} for channel in file.named_channels: signal = np.array(file.named_channels[channel].data) # Fill signal interruptions if impute_missing is True and np.isnan(np.sum(signal)): signal = pd.Series(signal).fillna(method="pad").values # Resample if necessary if file.named_channels[channel].samples_per_second != sampling_rate: signal = signal_resample( signal, sampling_rate=file.named_channels[channel].samples_per_second, desired_sampling_rate=sampling_rate, method=resample_method, ) data[channel] = signal # Sanitize lengths lengths = [] for channel in data: lengths += [len(data[channel])] if len(set(lengths)) > 1: # If different lengths length = pd.Series(lengths).mode()[ 0] # Find most common (target length) for channel in data: if len(data[channel]) > length: data[channel] = data[channel][0:length] if len(data[channel]) < length: data[channel] = np.concatenate([ data[channel], np.full((length - len(data[channel])), data[channel][-1]), ]) # Final dataframe df = pd.DataFrame(data) return df, sampling_rate
def read_acqknowledge(filename, path="", index="datetime", sampling_rate=1000, resampling_method="pad", fill_interruptions=True): """ Read and Format a BIOPAC's AcqKnowledge file into a pandas' dataframe. Parameters ---------- filename : str Filename (with or without the extension) of a BIOPAC's AcqKnowledge file. path : str Data directory. index : str How to index the dataframe. "datetime" for aproximate datetime (based on the file creation/change) and "range" for a simple range index. sampling_rate : int Final sampling rate (samples/second). resampling_method : str The resampling method: "mean", "pad" or "bfill", fill_interruptions : bool Automatically fill the eventual signal interruptions using a backfill method. Returns ---------- df : pandas.DataFrame() the acqknowledge file converted to a dataframe. Example ---------- >>> import neurokit as nk >>> >>> df = nk.read_acqknowledge('file.acq') Notes ---------- *Authors* - Dominique Makowski (https://github.com/DominiqueMakowski) *Dependencies* - pandas - bioread - datetime *See Also* - bioread package: https://github.com/njvack/bioread """ # Check path file = path + filename if ".acq" not in file: file += ".acq" if os.path.exists(file) is False: print( "NeuroKit Error: read_acqknowledge(): couldn't find the following file: " + filename) return () # Convert creation date creation_date = get_creation_date(file) creation_date = datetime.datetime.fromtimestamp(creation_date) # Read file file = bioread.read(file) # Get the channel frequencies freq_list = [] for channel in file.named_channels: freq_list.append(file.named_channels[channel].samples_per_second) # Get data with max frequency and the others data = {} data_else = {} for channel in file.named_channels: if file.named_channels[channel].samples_per_second == max(freq_list): data[channel] = file.named_channels[channel].data else: data_else[channel] = file.named_channels[channel].data # Create index time = [] beginning_date = creation_date - datetime.timedelta( 0, max(file.time_index)) for timestamps in file.time_index: time.append(beginning_date + datetime.timedelta(0, timestamps)) df = pd.DataFrame(data, index=time) # Create resampling factor sampling_rate = str(int(1000 / sampling_rate)) + "L" # max frequency must be 1000 if data_else: # if not empty for channel in data_else: channel_frequency = file.named_channels[channel].samples_per_second serie = data_else[channel] index = list( np.arange(0, max(file.time_index), 1 / channel_frequency)) index = index[:len(serie)] # Create index time = [] for timestamps in index: time.append(beginning_date + datetime.timedelta(0, timestamps)) data_else[channel] = pd.Series(serie, index=time) df2 = pd.DataFrame(data_else) # Resample if resampling_method == "mean": if data_else: df2 = df2.resample(sampling_rate).mean() df = df.resample(sampling_rate).mean() if resampling_method == "pad": if data_else: df2 = df2.resample(sampling_rate).pad() df = df.resample(sampling_rate).pad() if resampling_method == "bfill": if data_else: df2 = df2.resample(sampling_rate).bfill() df = df.resample(sampling_rate).bfill() if data_else: df = pd.concat([df, df2], 1) if index == "range": df = df.reset_index() # Fill signal interruptions df = df.fillna(method="backfill") return (df)
writetojson('task-fearcond_events.json', bidsout, events_dict) ######################################################################### # Physio data BIOPAC -> BIDS ######################################################################### for sub in suball: outpath = opj(bidsout, 'sub-' + sub) derivout = opj(derivativesout, 'sub-' + sub) if not os.path.exists(derivout): os.mkdir(derivout) os.mkdir(opj(derivout, 'scr')) data = bioread.read(opj(rawpath, 'physio', 'S' + sub + '_testing.acq')) # Get shock trigger onsets in SCR scrdat = data.channels[0].data emgdat = data.channels[1].data rmsemgdat = data.channels[6].data trigdat = data.channels[5].data # Get # samples /second sec = data.channels[0].samples_per_second rmsemgsec = data.channels[6].samples_per_second trigsec = data.channels[5].samples_per_second emgsec = data.channels[1].samples_per_second # Correct split files for part 25 that were apparently recorded with # a different sampling rate
#!/usr/bin/env python # Example taken from the README file. import bioread data = bioread.read('myfile.acq') data.graph_header.file_revision len(data.channels) data.channels[1].samples_per_second len(data.channels[1].data) len(data.channels[1].upsampled_data) data.channels[0].samples_per_second len(data.channels[0].data) data.channels[0].data[0] data.channels[0].raw_data[0] data.channels[0].name # let's assume the output is "CO2" data.named_channels['CO2'].data[0] from bioread.writers import MatlabWriter MatlabWriter.write_file(data, "myfile.mat")
def read_acqknowledge(filename, path="", index="datetime", sampling_rate="max", resampling_method="pad", fill_interruptions=True, return_sampling_rate=True): """ Read and Format a BIOPAC's AcqKnowledge file into a pandas' dataframe. Parameters ---------- filename : str Filename (with or without the extension) of a BIOPAC's AcqKnowledge file. path : str Data directory. index : str How to index the dataframe. "datetime" for aproximate datetime (based on the file creation/change) and "range" for a simple range index. sampling_rate : int Final sampling rate (samples/second). resampling_method : str The resampling method: "mean", "pad" or "bfill", fill_interruptions : bool Automatically fill the eventual signal interruptions using a backfill method. return_sampling_rate : bool Should it return the sampling rate in a tuple with the dataframe? Default will be changed to True in the future. Returns ---------- df, sampling_rate : pandas.DataFrame(), int The AcqKnowledge file converted to a dataframe and its sampling_rate. Example ---------- >>> import neurokit as nk >>> >>> df, sampling_rate = nk.read_acqknowledge('file.acq', return_sampling_rate=True) Notes ---------- *Authors* - `Dominique Makowski <https://dominiquemakowski.github.io/>`_ *Dependencies* - pandas - bioread - datetime *See Also* - bioread package: https://github.com/njvack/bioread """ # Check path file = path + filename if ".acq" not in file: file += ".acq" if os.path.exists(file) is False: print( "NeuroKit Error: read_acqknowledge(): couldn't find the following file: " + filename) return () # Convert creation date creation_date = find_creation_date(file) creation_date = datetime.datetime.fromtimestamp(creation_date) # Read file file = bioread.read(file) # Get the channel frequencies freq_list = [] for channel in file.named_channels: freq_list.append(file.named_channels[channel].samples_per_second) # Get data with max frequency and the others data = {} data_else = {} for channel in file.named_channels: if file.named_channels[channel].samples_per_second == max(freq_list): data[channel] = file.named_channels[channel].data else: data_else[channel] = file.named_channels[channel].data # Create index time = [] beginning_date = creation_date - datetime.timedelta( 0, max(file.time_index)) for timestamps in file.time_index: time.append(beginning_date + datetime.timedelta(0, timestamps)) df = pd.DataFrame(data, index=time) # max frequency must be 1000 if len(data_else.keys()) > 0: # if not empty for channel in data_else: channel_frequency = file.named_channels[channel].samples_per_second serie = data_else[channel] index = list( np.arange(0, max(file.time_index), 1 / channel_frequency)) index = index[:len(serie)] # Create index time = [] for timestamps in index: time.append(beginning_date + datetime.timedelta(0, timestamps)) data_else[channel] = pd.Series(serie, index=time) df2 = pd.DataFrame(data_else) # Create resampling factor if sampling_rate == "max": sampling_rate = max(freq_list) try: resampling_factor = str(int(1000 / sampling_rate)) + "L" except TypeError: print( "NeuroKit Warning: read_acqknowledge(): sampling_rate must be either num or 'max'. Setting to 'max'." ) sampling_rate = max(freq_list) resampling_factor = str(int(1000 / sampling_rate)) + "L" # Resample if resampling_method not in ["mean", "bfill", "pad"]: print( "NeuroKit Warning: read_acqknowledge(): resampling_factor must be 'mean', 'bfill' or 'pad'. Setting to 'pad'." ) resampling_method = 'pad' if resampling_method == "mean": if len(data_else.keys()) > 0: df2 = df2.resample(resampling_factor).mean() if int(sampling_rate) != int(max(freq_list)): df = df.resample(resampling_factor).mean() if resampling_method == "bfill": if len(data_else.keys()) > 0: df2 = df2.resample(resampling_factor).bfill() if int(sampling_rate) != int(max(freq_list)): df = df.resample(resampling_factor).bfill() if resampling_method == "pad": if len(data_else.keys()) > 0: df2 = df2.resample(resampling_factor).pad() if int(sampling_rate) != int(max(freq_list)): df = df.resample(resampling_factor).pad() # Join dataframes if len(data_else.keys()) > 0: df = pd.concat([df, df2], 1) if index == "range": df = df.reset_index() # Fill signal interruptions if fill_interruptions is True: df = df.fillna(method="backfill") if return_sampling_rate is False: return (df) else: return (df, sampling_rate)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Mar 12 11:28:29 2019 @author: or """ from biosppy import storage from biosppy.signals import eda, ecg import bioread a = bioread.read( '/media/Drobo/Levy_Lab/Projects/PTSD_KPE/physio_data/raw/kpe1387/scan_1/kpe1387.1_scripts_2018-09-10T08_39_24.acq' ) # choose scripts channel b = a.named_channels["GSR100C"].raw_data c = b[28700:154159] out = eda.eda(signal=c, sampling_rate=1000., show=True) k["onsetCor"] = k["onset"] + diff j = a.named_channels["Heart Rate"].raw_data n = j[287:154159] ecgOut = ecg.ecg(signal=n, show=True)
#%% events_file = '/media/Data/PTSD_KPE/condition_files/sub-1223_ses-1.csv' #%% Read event file import numpy as np import pandas as pd from scipy import signal events = pd.read_csv(events_file, sep=r'\s+') from biosppy import storage from biosppy.signals import eda, ecg import bioread a = bioread.read( '/media/Data/PTSD_KPE/physio_data/raw/kpe1223/scan_1/kpe1223.1_scripts_2017-01-30T08_17_08.acq' ) # choose scripts channel b = a.named_channels["GSR100C"].raw_data a_resample = signal.decimate(b, 40) plt.plot(b) plt.plot(a_resample) plt.show() c = a_resample[int(7000 / 40):int(130000 / 40)] out = eda.eda(signal=b, sampling_rate=1000., show=True) len(out['filtered']) plt.plot(out['ts']) d = b[130000:252799]
def test_reading_r35_file(): filename = path.join(DATA_PATH, "misc", "r35_test.acq") test_data = bioread.read(filename) # This will raise an exception on fail assert len(test_data.channels) == 2
base_input_dir.format(ses=biac_ses), '{s}_physio_run{r}.acq'.format(s=subid, r=run)) txt_file = os.path.join( base_input_dir.format(ses=biac_ses), '{s}_physio_run{r}.txt'.format(s=subid, r=run)) trim_output = os.path.join( base_input_dir.format(ses=biac_ses), '{s}_physio_run{r}_trim.txt'.format(s=subid, r=run)) fsl_output = os.path.join( base_input_dir.format(ses=biac_ses), '{s}_physio_run{r}_fsl.txt'.format(s=subid, r=run)) if not os.path.exists(input_file): print('Input file not found: {}'.format(input_file)) raise RuntimeError #Write out .txt version data = bioread.read(input_file) print('Writing text file: {}'.format(txt_file)) with open(txt_file, 'w') as fid: txtwriter.write_text(data, fid, [0, 1, 9], None) #Read .txt version in with pandas txt_data = pandas.read_csv(txt_file, sep='\t', engine='python') #Find the row with the beginning of the scan. #(Marked with 32.0 in the code channel) start_row = txt_data.loc[txt_data['Code (Volts)'] == 32.0] #Remove the rows of the dataframe before this row short_data = txt_data.drop(range(start_row.index[0])) #Calculate the length of the acquisition in seconds