def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() valid_subj_id = [] valid_session_n = [] for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') if reward_samples.lowpass_pupil_diameter.isna().sum() == 0: valid_subj_id.append(subj_id) valid_session_n.append(session_n) print('sample is valid!') else: print('sample is invalid. contains nans.') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed) valid_data_id = pd.DataFrame({ 'valid_sub': valid_subj_id, 'valid_session': valid_session_n }) valid_data_id.to_csv(os.path.join(processed_data_path, 'valid_data.csv'))
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('segmenting subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code) reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code) reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code) segmented_reward_samples = sg.segment(reward_samples, reward_messages) hdf = ep.save_hdf5(segmented_reward_samples, reward_events, reward_messages, subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() trial_end = 2000 for subj_id in unique_subjects: for session_n in unique_sessions: print('z-scoring baseline corrected & lowpass filtered data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples = z.zscore(reward_samples) plotting_reward_samples = reward_samples.loc[reward_samples.trial_sample <= trial_end] ## TODO: get reasonable y limits for all data within a subject and use that for plotting fig,figname = vz.visualize(plotting_reward_samples.trial_sample, plotting_reward_samples.z_pupil_diameter, subj_id, session_n, reward_code, id_str='zscored') vz.save(fig, figname) hdf = ep.save_hdf5(reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') print('z-scored data saved') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1: print('This session has no data.') continue peaks_df = amp.locate_peaks(reward_samples, subj_id, session_n, reward_code, save=True) figures = [] for trial_epoch in peaks_df.trial_epoch.unique(): epoch_samples = peaks_df.loc[peaks_df.trial_epoch == trial_epoch] fig_name, fig = amp.plot_extrema(epoch_samples, subj_id, session_n, reward_code, id_str=str(trial_epoch)) figures.append(fig) super_fig_name = ('tepr' + '_sub-' + str(subj_id) + '_sess-' + str(session_n) + '_cond-' + str(reward_code) + '_trial') amp.save_extrema(super_fig_name, figures) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) print('plotting baseline-corrected task-evoked response for subject {}'.format(subj_id) + ' session {}'.format(session_n) + ' condition {}'.format(reward_code)) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples.rename(columns={'sample': 'trial_samples'}, inplace=True) fig, fig_name = vz.visualize(reward_samples.trial_sample,reward_samples.bc_pupil_diameter, subj_id=subj_id, session_n=session_n, reward_code=reward_code, id_str='corr') vz.save(fig, fig_name) end_time = time.time() elapsed_time = end_time - start_time print('time elapsed to plot time series: ', elapsed_time)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) for subj_id in unique_subjects: for session_n in unique_sessions: print('checking preprocessing for subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files( subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('samples') reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('messages') reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('events') time.sleep(1)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path, _) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='zscored') if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1: print('This session has no data.') continue peak_df = amp.locate_peaks(reward_samples, subj_id, session_n, reward_code, save=True) mean_df = amp.find_mean(reward_samples, subj_id, session_n, reward_code, save=True) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() valid_data_id = pd.read_csv( os.path.join(processed_data_path, 'valid_data.csv')) unique_subjects = valid_data_id.valid_sub.values unique_sessions = valid_data_id.valid_session.values # (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, # reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('calculating summary stats for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') # for the stim onset to trial end interval ... # find peak amplitude # find peak latency # find fwhm # find area under the curve # calc simple average # calc median end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed) valid_data_id = pd.DataFrame({ 'valid_sub': valid_subj_id, 'valid_session': valid_session_n }) valid_data_id.to_csv(os.path.join(processed_data_path, 'valid_data.csv'))
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) n_trial_samples = 30 start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('plotting subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') # reward_samples = ep.read_hdf5('samples', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # reward_messages = ep.read_hdf5('messages', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # reward_events = ep.read_hdf5('events', subj_id, session_n, # intermediate_data_path, reward_code=reward_code, id_str='seg') # # if reward_samples is None: # print('check the data. a lot is missing.') reward_samples = vz.indicate_blinks(reward_samples, reward_events, subj_id, session_n, reward_code) fig, figname = vz.raster_plot(reward_samples, subj_id, session_n, reward_code, n_trial_samples, id_str='raw') vz.save(fig, figname) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('bandpass filtering data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') reward_samples = bp.high_bandpass_filter(reward_samples) reward_samples = bp.low_bandpass_filter(reward_samples) lp_fig, lp_figname = vz.visualize( reward_samples.trial_sample, reward_samples.lowpass_pupil_diameter, subj_id, session_n, reward_code, id_str='lowpass') vz.save(lp_fig, lp_figname) hp_fig, hp_figname = vz.visualize( reward_samples.trial_sample, reward_samples.highpass_pupil_diameter, subj_id, session_n, reward_code, id_str='highpass') vz.save(hp_fig, hp_figname) hdf = ep.save_hdf5(reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='bandpass') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)
subj_id, session_n = 789, 3 (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files( subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('samples') reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('messages') reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code) print('events')
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() lp_min = [] lp_max = [] hp_min = [] hp_max = [] for subj_id in unique_subjects: for session_n in unique_sessions: print('bandpass filtering data for subject {}'.format(subj_id) + ' session {}'.format(session_n)) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_messages = ep.read_hdf5('messages', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_events = ep.read_hdf5('events', subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='corr') reward_samples = bp.high_bandpass_filter(reward_samples) reward_samples = bp.low_bandpass_filter(reward_samples) lp_min.append(np.nanmin(reward_samples.lowpass_pupil_diameter)) lp_max.append(np.nanmax(reward_samples.lowpass_pupil_diameter)) hp_min.append(np.nanmin(reward_samples.highpass_pupil_diameter)) hp_max.append(np.nanmax(reward_samples.highpass_pupil_diameter)) end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed) print('min lowpass values: ', np.nanmin(lp_min)) print('max lowpass values: ', np.nanmax(lp_max)) print('min highpass values: ', np.nanmin(hp_min)) print('max highpass values: ', np.nanmax(hp_max))
def main(): (raw_data_path, intermediate_data_path, processed_data_path, figure_path) = cf.path_config() (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path, reward_task=1) start_time = time.time() for subj_id in unique_subjects: for session_n in unique_sessions: print('processing subject {}'.format(subj_id) + ' session {}'.format(session_n)) time.sleep(1) _, _, reward_code = ep.find_data_files(subj_id=subj_id, session_n=session_n, reward_task=1, lum_task=0, raw_data_path=raw_data_path) reward_samples = ep.read_hdf5('samples', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') reward_messages = ep.read_hdf5('messages', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') reward_events = ep.read_hdf5('events', subj_id, session_n, intermediate_data_path, reward_code=reward_code, id_str='seg') deblinked_reward_samples = dd.deblink(reward_samples, reward_events) print('deblinking complete') outlier_removed_reward_samples = dd.outlier_removal( deblinked_reward_samples) print('outliers removed') interpolated_reward_samples = dd.interpolate( outlier_removed_reward_samples) print('data interpolated') hdf = ep.save_hdf5(interpolated_reward_samples, reward_events, reward_messages, subj_id, session_n, processed_data_path, reward_code=reward_code, id_str='clean') print('clean data saved') end_time = time.time() time_elapsed = end_time - start_time print('time elapsed: ', time_elapsed)