def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ tpre = 0.5 tpost = 2.5 samp_rate = 30. for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname, parse_dates=False) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) oddball_sess = get_oddball_session(fname) df = pupil_utils.deblink(df) dfresamp = pupil_utils.resamp_filt_data(df) dfresamp['Condition'] = np.where(dfresamp.CRESP == 5, 'Standard', 'Target') pupil_utils.plot_qc(dfresamp, fname) sessdf = get_sessdf(dfresamp) sessdf['BlinkPct'] = get_blink_pct(dfresamp, fname) dfresamp['zDiameterPupilLRFilt'] = pupil_utils.zscore( dfresamp['DiameterPupilLRFilt']) targdf, standdf = proc_all_trials(sessdf, dfresamp['zDiameterPupilLRFilt'], tpre, tpost, samp_rate) targdf_long = reshape_df(targdf) standdf_long = reshape_df(standdf) glm_results = ts_glm( dfresamp.zDiameterPupilLRFilt, sessdf.loc[sessdf.Condition == 'Target', 'Timestamp'], sessdf.loc[sessdf.Condition == 'Standard', 'Timestamp'], dfresamp.BlinksLR) # Set subject ID and session as (as type string) glm_results['Subject'] = subid glm_results['Session'] = timepoint glm_results['OddballSession'] = oddball_sess save_glm_results(glm_results, fname) allconddf = standdf_long.append(targdf_long).reset_index(drop=True) # Set subject ID and session as (as type string) allconddf['Subject'] = subid allconddf['Session'] = timepoint allconddf['OddballSession'] = oddball_sess plot_pstc(allconddf, fname) save_pstc(allconddf, fname) # Set subject ID and session as (as type string) sessdf['Subject'] = subid sessdf['Session'] = timepoint sessdf['OddballSession'] = oddball_sess sessout = pupil_utils.get_outfile(fname, '_SessionData.csv') sessdf.to_csv(sessout, index=False)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) df = df[df.CurrentObject.str.contains("Recall", na=False)] df = pupil_utils.deblink(df) dfresamp = clean_trials(df) dfresamp = dfresamp[dfresamp.index<=dfresamp.index[0] + pd.offsets.Second(30)] dfresamp1s = dfresamp.resample('1S', closed='right', label='right').mean() dfresamp1s.index = dfresamp1s.index.round('S') dfresamp1s = dfresamp1s.dropna(how='all') pupildf = dfresamp1s.reset_index().rename(columns={ 'index':'Timestamp', 'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) pupilcols = ['Subject', 'Timestamp', 'Dilation', 'Baseline', 'Diameter', 'BlinkPct'] pupildf = pupildf[pupilcols].sort_values(by='Timestamp') # Keep only first 30s of trial pupildf = pupildf.loc[pupildf.Timestamp<=pupildf.Timestamp[0] + pd.offsets.Second(30)] # Set subject ID and session as (as type string) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("HVLT_Recall-Recognition","HVLT_Recall") pupil_outname = pupil_outname.replace("-Delay","-Recall") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname)) plot_trials(pupildf, fname) #### Create data for 10 second blocks dfresamp10s = dfresamp.resample('10s', closed='right', label='right').mean() pupilcols = ['Subject', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR'] pupildf10s = dfresamp10s.reset_index()[pupilcols].sort_values(by='Timestamp') pupildf10s = pupildf10s[pupilcols].rename(columns={'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) # Set subject ID as (as type string) pupildf10s['Subject'] = subid pupildf10s['Session'] = timepoint pupildf10s['Timestamp'] = pd.to_datetime(pupildf10s.Timestamp).dt.strftime('%H:%M:%S') pupil10s_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil_Tertiles.csv') pupil10s_outname = pupil10s_outname.replace("HVLT_Recall-Recognition","HVLT_Recall") pupil10s_outname = pupil10s_outname.replace("-Delay","-Recall") 'Writing tertile data to {0}'.format(pupil10s_outname) pupildf10s.to_csv(pupil10s_outname, index=False)
def proc_subject(filelist): """ Given an infile of raw pupil data, saves out: 1) Session level data with dilation data summarized for each trial 2) Dataframe of average peristumulus timecourse for each condition 3) Plot of average peristumulus timecourse for each condition 4) Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) # Keep only samples after last sample of Recall df = df[df[df.CurrentObject == "Recall"].index[-1] + 1:] df = pupil_utils.deblink(df) dfresamp = pupil_utils.resamp_filt_data(df, filt_type='low', string_cols=['CurrentObject']) # Resampling fills forward fills Current Object when missing. This # results in values of "Response" at beginning of trials. Reaplce these # by backfilling from first occurrence of "Fixation" in every trial. for i in dfresamp.TrialId.unique(): trialstartidx = (dfresamp.TrialId == i).idxmax() fixstartidx = ( dfresamp.loc[dfresamp.TrialId == i, "CurrentObject"] == "Fixation").idxmax() dfresamp.loc[trialstartidx:fixstartidx, "CurrentObject"] = "Fixation" dfresamp = clean_trials(df) pupildf = proc_all_trials(dfresamp) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf = pupildf.rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) # Reorder columns cols = [ 'Subject', 'Session', 'TrialId', 'Baseline', 'Diameter', 'Dilation', 'BlinkPct', 'Duration', 'Condition' ] pupildf = pupildf[cols] pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("-Delay", "-Recognition") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname))
def proc_subject(filelist): """ Given an infile of raw pupil data, saves out: 1) Session level data with dilation data summarized for each trial 2) Dataframe of average peristumulus timecourse for each condition 3) Plot of average peristumulus timecourse for each condition 4) Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) # Keep only samples after last sample of Recall df = df[df[df.CurrentObject == "Recall"].index[-1] + 1:] df = pupil_utils.deblink(df) dfresamp = clean_trials(df) alltrialsdf = proc_all_trials(dfresamp) # Remove trials with >50% blinks alltrialsdf = alltrialsdf[alltrialsdf.BlinkPct < .50] plot_trials(alltrialsdf, fname) pupildf = alltrialsdf.groupby(['Condition', 'Timestamp'])[[ 'Baseline', 'DiameterPupilLRFilt', 'Dilation', 'BlinksLR', 'Duration' ]].mean() pupildf['ntrials'] = alltrialsdf.groupby(['Condition', 'Timestamp']).size() pupildf = pupildf.reset_index() pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf = pupildf.rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) # Reorder columns cols = [ 'Subject', 'Session', 'Baseline', 'Timestamp', 'Diameter', 'Dilation', 'BlinkPct', 'Duration', 'Condition', 'ntrials' ] pupildf = pupildf[cols] pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("HVLT_Recall-Recognition", "HVLT_Recognition") pupil_outname = pupil_outname.replace("-Delay", "-Recognition") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname))
def save_total_blink_pct(dfresamp, infile): """Calculate and save out percent of trials with blinks in session""" outfile = pupil_utils.get_outfile(infile, '_BlinkPct.json') blink_dict = {} blink_dict['TotalBlinkPct'] = float(dfresamp.BlinksLR.mean()) blink_dict['Subject'] = pupil_utils.get_subid(dfresamp['Subject'], infile) blink_dict['Session'] = pupil_utils.get_timepoint(dfresamp['Session'], infile) blink_dict['OddballSession'] = get_oddball_session(infile) blink_json = json.dumps(blink_dict) with open(outfile, 'w') as f: f.write(blink_json)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname, parse_dates=False) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) trialevents = get_trial_events(df) dfresamp = clean_trials(df, trialevents) dfresamp = dfresamp.reset_index(drop=False).set_index(['Condition','Trial']) dfresamp['Timestamp'] = dfresamp.groupby(level='Trial')['Timestamp'].transform(lambda x: x - x.iat[0]) dfresamp['Timestamp'] = pd.to_datetime(dfresamp.Timestamp.values.astype(np.int64)) ### Create data resampled to 1 second dfresamp1s = dfresamp.groupby(level=['Condition','Trial']).apply(lambda x: x.resample('1s', on='Timestamp', closed='right', label='right').mean()) pupilcols = ['Subject', 'Session', 'Trial', 'Condition', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR'] pupildf = dfresamp1s.reset_index()[pupilcols].sort_values(by=['Trial','Timestamp']) pupildf = pupildf[pupilcols].rename(columns={'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) # Set subject ID and session as (as type string) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf['Timestamp'] = pd.to_datetime(pupildf.Timestamp).dt.strftime('%H:%M:%S') pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') print('Writing processed data to {0}'.format(pupil_outname)) pupildf.to_csv(pupil_outname, index=False) plot_trials(pupildf, fname) #### Create data for 15 second blocks dfresamp15s = dfresamp.groupby(level=['Condition','Trial']).apply(lambda x: x.resample('15s', on='Timestamp', closed='right', label='right').mean()) pupilcols = ['Subject', 'Session', 'Trial', 'Condition', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR'] pupildf15s = dfresamp15s.reset_index()[pupilcols].sort_values(by=['Trial','Timestamp']) pupildf15s = pupildf15s[pupilcols].rename(columns={'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) # Set subject ID as (as type string) pupildf15s['Subject'] = subid pupildf15s['Session'] = timepoint pupildf15s['Timestamp'] = pd.to_datetime(pupildf15s.Timestamp).dt.strftime('%H:%M:%S') pupil15s_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil_Quartiles.csv') 'Writing quartile data to {0}'.format(pupil15s_outname) pupildf15s.to_csv(pupil15s_outname, index=False)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) trialevents = get_trial_events(df) dfresamp = clean_trials(trialevents) dfresamp = dfresamp.reset_index(level='Timestamp').set_index( ['Load', 'Trial']) # # Save out dfresamp for cleaned pupil at 30Hz for individuals trials # pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil30Hz.csv') # pupildf.to_csv(pupil_outname, index=True) # Take average of each second dfresamp1s = dfresamp.groupby( level=['Load', 'Trial']).apply(lambda x: x.resample( '1s', on='Timestamp', closed='right', label='right').mean() ).reset_index() # Select and rename columns of interest pupilcols = [ 'Subject', 'Trial', 'Load', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR' ] dfresamp1s = dfresamp1s[pupilcols].rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) # Save out individual trial data for Wang Lab intermed_outname = pupil_utils.get_proc_outfile( fname, '_AllTrials.csv') intermed_outname = intermed_outname.replace('Processed Pupil Data', 'Wang Lab') if not os.path.exists(os.path.dirname(intermed_outname)): os.makedirs(os.path.dirname(intermed_outname)) dfresamp1s['Timestamp'] = dfresamp1s.Timestamp.dt.strftime('%H:%M:%S') dfresamp1s.to_csv(intermed_outname, index=False)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) trialevents = get_trial_events(df) dfresamp = clean_trials(trialevents) dfresamp = dfresamp.reset_index(level='Timestamp').set_index(['Load','Trial']) # # Save out dfresamp for cleaned pupil at 30Hz for individuals trials # pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil30Hz.csv') # pupildf.to_csv(pupil_outname, index=True) # Take average of each second dfresamp1s = dfresamp.groupby(level=['Load','Trial']).apply(lambda x: x.resample('1s', on='Timestamp', closed='right', label='right').mean()).reset_index() # Select and rename columns of interest pupilcols = ['Subject', 'Trial', 'Load', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR'] dfresamp1s = dfresamp1s[pupilcols].rename(columns={'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) # Set samples with >50% blinks to missing dfresamp1s.loc[dfresamp1s.BlinkPct>.5, ['Dilation','Baseline','Diameter','BlinkPct']] = np.nan # Drop missing samples and average of trials within load pupildf = dfresamp1s.groupby(['Load','Timestamp']).mean() # Set subject ID and session as (as type string) pupildf['Subject'] = subid pupildf['Session'] = timepoint # Add number of non-missing trials that contributed to each sample average pupildf['ntrials'] = dfresamp1s.dropna(subset=['Dilation']).groupby(['Load','Timestamp']).size() pupildf = pupildf.reset_index() pupildf['Timestamp'] = pupildf.Timestamp.dt.strftime('%H:%M:%S') pupildf = pupildf[['Subject','Session','Load','Timestamp','Baseline','Diameter','Dilation','BlinkPct','ntrials']] pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') print('Writing processed data to {0}'.format(pupil_outname)) # Save out data and plots pupildf.to_csv(pupil_outname, index=False) plot_trials(pupildf, fname)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) trialevents = get_trial_events(df) dfresamp = clean_trials(trialevents) pupildf = dfresamp.groupby(['Trial', 'CurrentObject']).mean().reset_index() pupildf['Word'] = pupildf.CurrentObject.str.replace('PlayWord', '').astype('int') pupilcols = [ 'Subject', 'Session', 'Trial', 'Word', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR' ] pupildf = pupildf[pupilcols] pupildf = pupildf.sort_values(by=['Trial', 'Word']) # Set subject ID and session as (as type string) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf = pupildf[pupilcols].rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname)) plot_trials(pupildf, fname)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ tpre = 0.250 tpost = 2.5 samp_rate = 30. for pupil_fname in filelist: print('Processing {}'.format(pupil_fname)) if (os.path.splitext(pupil_fname)[-1] == ".gazedata") | ( os.path.splitext(pupil_fname)[-1] == ".csv"): df = pd.read_csv(pupil_fname, sep="\t") elif os.path.splitext(pupil_fname)[-1] == ".xlsx": df = pd.read_excel(pupil_fname, parse_dates=False) else: raise IOError('Could not open {}'.format(pupil_fname)) subid = pupil_utils.get_subid(df['Subject'], pupil_fname) timepoint = pupil_utils.get_timepoint(df['Session'], pupil_fname) df = pupil_utils.deblink(df) df.CurrentObject.replace('StimulusRecord', 'Stimulus', inplace=True) dfresamp = pupil_utils.resamp_filt_data( df, filt_type='band', string_cols=['TrialId', 'CurrentObject']) dfresamp = dfresamp.drop(columns='TrialId_x').rename( columns={'TrialId_y': 'TrialId'}) eprime_fname = get_eprime_fname(pupil_fname) eprime = pd.read_csv(eprime_fname, sep='\t', encoding='utf-16', skiprows=0) if not np.array_equal(eprime.columns[:3], ['ExperimentName', 'Subject', 'Session']): eprime = pd.read_csv(eprime_fname, sep='\t', encoding='utf-16', skiprows=1) eprime = eprime.rename(columns={"Congruency": "Condition"}) pupil_utils.plot_qc(dfresamp, pupil_fname) sessdf = get_sessdf(dfresamp, eprime) sessdf['BlinkPct'] = get_blink_pct(dfresamp, pupil_fname) dfresamp['zDiameterPupilLRFilt'] = pupil_utils.zscore( dfresamp['DiameterPupilLRFilt']) condf, incondf, neutraldf = proc_all_trials( sessdf, dfresamp['zDiameterPupilLRFilt'], tpre, tpost, samp_rate) condf_long = reshape_df(condf) incondf_long = reshape_df(incondf) neutraldf_long = reshape_df(neutraldf) glm_results = ts_glm(dfresamp.zDiameterPupilLRFilt, sessdf.loc[sessdf.Condition == 'C', 'Timestamp'], sessdf.loc[sessdf.Condition == 'I', 'Timestamp'], sessdf.loc[sessdf.Condition == 'N', 'Timestamp'], dfresamp.BlinksLR) # Set subject ID and session as (as type string) glm_results['Subject'] = subid glm_results['Session'] = timepoint save_glm_results(glm_results, pupil_fname) allconddf = condf_long.append(incondf_long).reset_index(drop=True) allconddf = allconddf.append(neutraldf_long).reset_index(drop=True) # Set subject ID and session as (as type string) allconddf['Subject'] = subid allconddf['Session'] = timepoint allconddf = allconddf[allconddf.Timepoint < 3.0] plot_pstc(allconddf, pupil_fname) save_pstc(allconddf, pupil_fname) sessdf['Subject'] = subid sessdf['Session'] = timepoint sessout = pupil_utils.get_proc_outfile(pupil_fname, '_SessionData.csv') sessdf.to_csv(sessout, index=False)