def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() valid_subj_id = [] valid_session_n = [] for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') if reward_samples.lowpass_pupil_diameter.isna().sum() == 0: valid_subj_id.append(subj_id) valid_session_n.append(session_n) print('sample is valid!') else: print('sample is invalid. contains nans.') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed) valid_data_id = pd.DataFrame({ 'valid_sub': valid_subj_id, 'valid_session': valid_session_n }) valid_data_id.to_csv(os.path.join(processed_data_path, 'valid_data.csv'))
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() unique_subjects, unique_sessions, unique_reward_codes = md.extract_subjects_sessions(raw_data_path, reward_task=1) for subj_id in unique_subjects: for session_n in unique_sessions: print('processing subject {}'.format(subj_id) + ', session {}'.format(session_n)) subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) print(subj_data_file, reward_code) samples, events, messages = ep.read_edf(subj_data_file_raw) print(samples.head()) samples, events, messages = ep.clean_df(samples, events, messages, reward_task=1) print(samples.head(), events.head(), messages.head()) samples, events, messages = ep.extract_experimental_data(samples, events, messages) samples, events, messages = ep.define_relative_time(samples, events, messages) hdf = ep.save_hdf5(samples, events, messages, subj_id, session_n, intermediate_data_path, reward_code=reward_code)
def main(): (raw_data_path, _, processed_data_path, figure_path, simulated_data_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subject in unique_subjects: for reward_code, session in zip(unique_reward_codes, unique_sessions): processed_fn = ('tepr' + '_sub-' + str(subject) + '_sess-' + str(session) + '_cond-' + str(reward_code) + '_trial.csv') learning_signals_fn = 'sub-{}_cond-{}_learning_signals.csv'.format( subject, reward_code) pupil_amplitude_base = 'tepr_sub-{}_sess-*_cond-{}_trial_peaks.csv'.format( subject, reward_code) #cant handle wildcard pupil_summary_base = 'tepr_sub-{}_sess-*_cond-{}_trial_means.csv'.format( subject, reward_code) pupil_amplitude_fn = glob.glob( os.path.join(processed_data_path, pupil_amplitude_base)) pupil_summary_fn = glob.glob( os.path.join(processed_data_path, pupil_summary_base)) if not pupil_amplitude_fn: print('No data for this session.') continue learning_signals_df = pd.read_csv( os.path.join(simulated_data_path, learning_signals_fn)) pupil_amplitude_df = pd.read_csv(pupil_amplitude_fn[0]) pupil_summary_df = pd.read_csv(pupil_summary_fn[0]) try: trial_df = pd.concat([ learning_signals_df, pupil_amplitude_df, pupil_summary_df ], axis=1) trial_df = trial_df.loc[:, ~trial_df.columns.duplicated()] trial_df.drop(columns=['trial_epoch'], inplace=True) trial_df.to_csv(os.path.join(processed_data_path, processed_fn), index=False) except: print('error in concat.') pass end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('segmenting subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code) reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code) reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code) segmented_reward_samples = sg.segment(reward_samples, reward_messages) hdf = ep.save_hdf5(segmented_reward_samples, reward_events, reward_messages, subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() trial_end = 2000 for subj_id in unique_subjects: for session_n in unique_sessions: print('z-scoring baseline corrected & lowpass filtered data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples = z.zscore(reward_samples) plotting_reward_samples = reward_samples.loc[reward_samples.trial_sample <= trial_end] ## TODO: get reasonable y limits for all data within a subject and use that for plotting fig,figname = vz.visualize(plotting_reward_samples.trial_sample, plotting_reward_samples.z_pupil_diameter, subj_id, session_n, reward_code, id_str='zscored') vz.save(fig, figname) hdf = ep.save_hdf5(reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') print('z-scored data saved') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1: print('This session has no data.') continue peaks_df = amp.locate_peaks(reward_samples, subj_id, session_n, reward_code, save=True) figures = [] for trial_epoch in peaks_df.trial_epoch.unique(): epoch_samples = peaks_df.loc[peaks_df.trial_epoch == trial_epoch] fig_name, fig = amp.plot_extrema(epoch_samples, subj_id, session_n, reward_code, id_str=str(trial_epoch)) figures.append(fig) super_fig_name = ('tepr' + '_sub-' + str(subj_id) + '_sess-' + str(session_n) + '_cond-' + str(reward_code) + '_trial') amp.save_extrema(super_fig_name, figures) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) print('plotting baseline-corrected task-evoked response for subject {}'.format(subj_id) + ' session {}'.format(session_n) + ' condition {}'.format(reward_code)) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples.rename(columns={'sample': 'trial_samples'}, inplace=True) fig, fig_name = vz.visualize(reward_samples.trial_sample,reward_samples.bc_pupil_diameter, subj_id=subj_id, session_n=session_n, reward_code=reward_code, id_str='corr') vz.save(fig, fig_name) end_time = time.time() elapsed_time = end_time - start_time print('time elapsed to plot time series: ', elapsed_time)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) for subj_id in unique_subjects: for session_n in unique_sessions: print('checking preprocessing for subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files( subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('samples') reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('messages') reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('events') time.sleep(1)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path, _) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1: print('This session has no data.') continue peak_df = amp.locate_peaks(reward_samples, subj_id, session_n, reward_code, save=True) mean_df = amp.find_mean(reward_samples, subj_id, session_n, reward_code, save=True) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (_, _, _, figure_path, simulated_data_path) = cf.path_config() learning_signals_fn = 'learning_signals.csv' (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() learning_signals_df = pd.read_csv( os.path.join(simulated_data_path, learning_signals_fn)) print(learning_signals_df.head()) print(learning_signals_df.subj_id.unique() + '\n', learning_signals_df.reward_code.unique()) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) n_trial_samples = 30 start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('plotting subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') # reward_samples = ep.read_hdf5('samples', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # reward_messages = ep.read_hdf5('messages', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # reward_events = ep.read_hdf5('events', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # # if reward_samples is None: # print('check the data. a lot is missing.') reward_samples = vz.indicate_blinks(reward_samples, reward_events, subj_id, session_n, reward_code) fig, figname = vz.raster_plot(reward_samples, subj_id, session_n, reward_code, n_trial_samples, id_str='raw') vz.save(fig, figname) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('bandpass filtering data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_samples = bp.high_bandpass_filter(reward_samples) reward_samples = bp.low_bandpass_filter(reward_samples) lp_fig, lp_figname = vz.visualize( reward_samples.trial_sample, reward_samples.lowpass_pupil_diameter, subj_id, session_n, reward_code, id_str='lowpass') vz.save(lp_fig, lp_figname) hp_fig, hp_figname = vz.visualize( reward_samples.trial_sample, reward_samples.highpass_pupil_diameter, subj_id, session_n, reward_code, id_str='highpass') vz.save(hp_fig, hp_figname) hdf = ep.save_hdf5(reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, _, _, figure_path, simulated_data_path) = cf.path_config() learning_signals_fn = 'learning_signals.csv' base_fn = 'sub-*_cond-*_learning_signals.csv' (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() learning_signals_df = pd.read_csv( os.path.join(simulated_data_path, learning_signals_fn)) n_trials = 400 n_subjects = learning_signals_df.subj_id.nunique() expected_len = n_trials - 2 unique_reward_codes = np.repeat( learning_signals_df.reward_code.sort_values(ascending=True).unique(), n_subjects) unique_reward_values = np.repeat( [6510, 6520, 6530, 7510, 7520, 7530, 8510, 8520, 8530], n_subjects) def split(df, group): gb = df.groupby(group) return [gb.get_group(x) for x in gb.groups] def remove_first_last_trials(df_list, expected_len=expected_len): df_lens = [] sliced_dfs = [df.iloc[1:-1] for df in df_list] [df_lens.append(len(df)) for df in sliced_dfs] # check lengths are expected assert np.unique( df_lens) == expected_len, 'check length of sliced dfs!' return sliced_dfs def extract_fns(df_list): fns = [('sub-' + str(df.subj_id.unique()[0]) + '_cond-' + str(df.reward_code.unique()[0]) + '_learning_signals.csv') for df in df_list] return fns def save_split_dfs(df_list, fn_list, data_path=simulated_data_path): [ df.to_csv(os.path.join(data_path, fn), index=False) for df, fn in zip(df_list, fn_list) ] return print('dfs saved') def decode_condition(base_fn=base_fn, data_path=simulated_data_path, reward_values=unique_reward_values, reward_codes=unique_reward_codes): def get_cond(elem): return elem[-22:-21] # get the condition code for each file learning_signals_fns = glob.glob(os.path.join(data_path, base_fn)) learning_signals_fns.sort(key=get_cond) [ print(reward_code, reward_val, fn) for fn, reward_val, reward_code in zip( learning_signals_fns, reward_values, unique_reward_codes) ] learning_signals_fns_decoded = [ re.sub('cond-' + str(reward_code), 'cond-' + str(reward_val), fn) for fn, reward_val, reward_code in zip( learning_signals_fns, reward_values, unique_reward_codes) ] [ os.rename(original_fn, decoded_fn) for original_fn, decoded_fn in zip( learning_signals_fns, learning_signals_fns_decoded) ] os.listdir(simulated_data_path) # check the renaming return None dfs = split(learning_signals_df, ['subj_id', 'reward_code']) sliced_dfs = remove_first_last_trials(dfs) parsed_learning_signals_fns = extract_fns(sliced_dfs) save_split_dfs(sliced_dfs, parsed_learning_signals_fns) decode_condition() end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() lp_min = [] lp_max = [] hp_min = [] hp_max = [] for subj_id in unique_subjects: for session_n in unique_sessions: print('bandpass filtering data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples = bp.high_bandpass_filter(reward_samples) reward_samples = bp.low_bandpass_filter(reward_samples) lp_min.append(np.nanmin(reward_samples.lowpass_pupil_diameter)) lp_max.append(np.nanmax(reward_samples.lowpass_pupil_diameter)) hp_min.append(np.nanmin(reward_samples.highpass_pupil_diameter)) hp_max.append(np.nanmax(reward_samples.highpass_pupil_diameter)) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed) print('min lowpass values: ', np.nanmin(lp_min)) print('max lowpass values: ', np.nanmax(lp_max)) print('min highpass values: ', np.nanmin(hp_min)) print('max highpass values: ', np.nanmax(hp_max))
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('processing subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') deblinked_reward_samples = dd.deblink(reward_samples, reward_events) print('deblinking complete') outlier_removed_reward_samples = dd.outlier_removal( deblinked_reward_samples) print('outliers removed') interpolated_reward_samples = dd.interpolate( outlier_removed_reward_samples) print('data interpolated') hdf = ep.save_hdf5(interpolated_reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') print('clean data saved') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)