def clean_trials(trialevents): resampled_dict = {} for trial in trialevents.Trial.unique(): starttime, stoptime = trialevents.loc[trialevents.Trial == trial, 'TETTime'].iloc[[0, -1]] rawtrial = trialevents.loc[(trialevents.TETTime >= starttime) & (trialevents.TETTime <= stoptime)] cleantrial = pupil_utils.deblink(rawtrial) string_cols = ['Load', 'Trial', 'Condition'] trial_resamp = pupil_utils.resamp_filt_data(cleantrial, filt_type='low', string_cols=string_cols) baseline = trial_resamp.loc[trial_resamp.Condition == 'Ready', 'DiameterPupilLRFilt'].last( '250ms').mean() baseline_blinks = trial_resamp.loc[trial_resamp.Condition == 'Ready', 'BlinksLR'].last('250ms').mean() if baseline_blinks > .5: baseline = np.nan trial_resamp['Baseline'] = baseline trial_resamp['Dilation'] = trial_resamp[ 'DiameterPupilLRFilt'] - trial_resamp['Baseline'] trial_resamp = trial_resamp[trial_resamp.Condition == 'Record'] trial_resamp.index = pd.DatetimeIndex( (trial_resamp.index - trial_resamp.index[0]).astype(np.int64)) resampled_dict[trial] = trial_resamp dfresamp = pd.concat(resampled_dict, names=['Trial', 'Timestamp']) return dfresamp
def clean_trials(trialevents): resampled_dict = {} for trial in trialevents.Trial.unique(): rawtrial = trialevents.loc[trialevents.Trial == trial] rawtrial = rawtrial.loc[(rawtrial.CurrentObject == 'Ready') | ( rawtrial.CurrentObject.str.contains('PlayWord'))] cleantrial = pupil_utils.deblink(rawtrial, pupilthresh_hi=4., pupilthresh_lo=1.5) cleantrial.loc[:, 'Trial'] = cleantrial.Trial.astype('str') string_cols = ['Trial', 'CurrentObject'] trial_resamp = pupil_utils.resamp_filt_data(cleantrial, filt_type='low', string_cols=string_cols) baseline = trial_resamp.loc[trial_resamp.CurrentObject == "Ready", "DiameterPupilLRFilt"].last( "1000ms").mean() trial_resamp['Baseline'] = baseline trial_resamp['Dilation'] = trial_resamp[ 'DiameterPupilLRFilt'] - trial_resamp['Baseline'] trial_resamp = trial_resamp[trial_resamp.CurrentObject.str.match( "PlayWord")] trial_resamp.index = pd.DatetimeIndex( (trial_resamp.index - trial_resamp.index[0]).astype(np.int64)) resampled_dict[trial] = trial_resamp dfresamp = pd.concat(resampled_dict, names=['Trial', 'Timestamp']) dfresamp = dfresamp.reset_index(level='Trial', drop=True).reset_index() return dfresamp
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ tpre = 0.5 tpost = 2.5 samp_rate = 30. for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname, parse_dates=False) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) oddball_sess = get_oddball_session(fname) df = pupil_utils.deblink(df) dfresamp = pupil_utils.resamp_filt_data(df) dfresamp['Condition'] = np.where(dfresamp.CRESP == 5, 'Standard', 'Target') pupil_utils.plot_qc(dfresamp, fname) sessdf = get_sessdf(dfresamp) sessdf['BlinkPct'] = get_blink_pct(dfresamp, fname) dfresamp['zDiameterPupilLRFilt'] = pupil_utils.zscore( dfresamp['DiameterPupilLRFilt']) targdf, standdf = proc_all_trials(sessdf, dfresamp['zDiameterPupilLRFilt'], tpre, tpost, samp_rate) targdf_long = reshape_df(targdf) standdf_long = reshape_df(standdf) glm_results = ts_glm( dfresamp.zDiameterPupilLRFilt, sessdf.loc[sessdf.Condition == 'Target', 'Timestamp'], sessdf.loc[sessdf.Condition == 'Standard', 'Timestamp'], dfresamp.BlinksLR) # Set subject ID and session as (as type string) glm_results['Subject'] = subid glm_results['Session'] = timepoint glm_results['OddballSession'] = oddball_sess save_glm_results(glm_results, fname) allconddf = standdf_long.append(targdf_long).reset_index(drop=True) # Set subject ID and session as (as type string) allconddf['Subject'] = subid allconddf['Session'] = timepoint allconddf['OddballSession'] = oddball_sess plot_pstc(allconddf, fname) save_pstc(allconddf, fname) # Set subject ID and session as (as type string) sessdf['Subject'] = subid sessdf['Session'] = timepoint sessdf['OddballSession'] = oddball_sess sessout = pupil_utils.get_outfile(fname, '_SessionData.csv') sessdf.to_csv(sessout, index=False)
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) df = df[df.CurrentObject.str.contains("Recall", na=False)] df = pupil_utils.deblink(df) dfresamp = clean_trials(df) dfresamp = dfresamp[dfresamp.index<=dfresamp.index[0] + pd.offsets.Second(30)] dfresamp1s = dfresamp.resample('1S', closed='right', label='right').mean() dfresamp1s.index = dfresamp1s.index.round('S') dfresamp1s = dfresamp1s.dropna(how='all') pupildf = dfresamp1s.reset_index().rename(columns={ 'index':'Timestamp', 'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) pupilcols = ['Subject', 'Timestamp', 'Dilation', 'Baseline', 'Diameter', 'BlinkPct'] pupildf = pupildf[pupilcols].sort_values(by='Timestamp') # Keep only first 30s of trial pupildf = pupildf.loc[pupildf.Timestamp<=pupildf.Timestamp[0] + pd.offsets.Second(30)] # Set subject ID and session as (as type string) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("HVLT_Recall-Recognition","HVLT_Recall") pupil_outname = pupil_outname.replace("-Delay","-Recall") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname)) plot_trials(pupildf, fname) #### Create data for 10 second blocks dfresamp10s = dfresamp.resample('10s', closed='right', label='right').mean() pupilcols = ['Subject', 'Timestamp', 'Dilation', 'Baseline', 'DiameterPupilLRFilt', 'BlinksLR'] pupildf10s = dfresamp10s.reset_index()[pupilcols].sort_values(by='Timestamp') pupildf10s = pupildf10s[pupilcols].rename(columns={'DiameterPupilLRFilt':'Diameter', 'BlinksLR':'BlinkPct'}) # Set subject ID as (as type string) pupildf10s['Subject'] = subid pupildf10s['Session'] = timepoint pupildf10s['Timestamp'] = pd.to_datetime(pupildf10s.Timestamp).dt.strftime('%H:%M:%S') pupil10s_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil_Tertiles.csv') pupil10s_outname = pupil10s_outname.replace("HVLT_Recall-Recognition","HVLT_Recall") pupil10s_outname = pupil10s_outname.replace("-Delay","-Recall") 'Writing tertile data to {0}'.format(pupil10s_outname) pupildf10s.to_csv(pupil10s_outname, index=False)
def proc_subject(filelist): """ Given an infile of raw pupil data, saves out: 1) Session level data with dilation data summarized for each trial 2) Dataframe of average peristumulus timecourse for each condition 3) Plot of average peristumulus timecourse for each condition 4) Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) # Keep only samples after last sample of Recall df = df[df[df.CurrentObject == "Recall"].index[-1] + 1:] df = pupil_utils.deblink(df) dfresamp = pupil_utils.resamp_filt_data(df, filt_type='low', string_cols=['CurrentObject']) # Resampling fills forward fills Current Object when missing. This # results in values of "Response" at beginning of trials. Reaplce these # by backfilling from first occurrence of "Fixation" in every trial. for i in dfresamp.TrialId.unique(): trialstartidx = (dfresamp.TrialId == i).idxmax() fixstartidx = ( dfresamp.loc[dfresamp.TrialId == i, "CurrentObject"] == "Fixation").idxmax() dfresamp.loc[trialstartidx:fixstartidx, "CurrentObject"] = "Fixation" dfresamp = clean_trials(df) pupildf = proc_all_trials(dfresamp) pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf = pupildf.rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) # Reorder columns cols = [ 'Subject', 'Session', 'TrialId', 'Baseline', 'Diameter', 'Dilation', 'BlinkPct', 'Duration', 'Condition' ] pupildf = pupildf[cols] pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("-Delay", "-Recognition") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname))
def proc_subject(filelist): """ Given an infile of raw pupil data, saves out: 1) Session level data with dilation data summarized for each trial 2) Dataframe of average peristumulus timecourse for each condition 3) Plot of average peristumulus timecourse for each condition 4) Percent of samples with blinks """ for fname in filelist: print('Processing {}'.format(fname)) if (os.path.splitext(fname)[-1] == ".gazedata") | (os.path.splitext(fname)[-1] == ".csv"): df = pd.read_csv(fname, sep="\t") elif os.path.splitext(fname)[-1] == ".xlsx": df = pd.read_excel(fname) else: raise IOError('Could not open {}'.format(fname)) subid = pupil_utils.get_subid(df['Subject'], fname) timepoint = pupil_utils.get_timepoint(df['Session'], fname) # Keep only samples after last sample of Recall df = df[df[df.CurrentObject == "Recall"].index[-1] + 1:] df = pupil_utils.deblink(df) dfresamp = clean_trials(df) alltrialsdf = proc_all_trials(dfresamp) # Remove trials with >50% blinks alltrialsdf = alltrialsdf[alltrialsdf.BlinkPct < .50] plot_trials(alltrialsdf, fname) pupildf = alltrialsdf.groupby(['Condition', 'Timestamp'])[[ 'Baseline', 'DiameterPupilLRFilt', 'Dilation', 'BlinksLR', 'Duration' ]].mean() pupildf['ntrials'] = alltrialsdf.groupby(['Condition', 'Timestamp']).size() pupildf = pupildf.reset_index() pupildf['Subject'] = subid pupildf['Session'] = timepoint pupildf = pupildf.rename(columns={ 'DiameterPupilLRFilt': 'Diameter', 'BlinksLR': 'BlinkPct' }) # Reorder columns cols = [ 'Subject', 'Session', 'Baseline', 'Timestamp', 'Diameter', 'Dilation', 'BlinkPct', 'Duration', 'Condition', 'ntrials' ] pupildf = pupildf[cols] pupil_outname = pupil_utils.get_proc_outfile(fname, '_ProcessedPupil.csv') pupil_outname = pupil_outname.replace("HVLT_Recall-Recognition", "HVLT_Recognition") pupil_outname = pupil_outname.replace("-Delay", "-Recognition") pupildf.to_csv(pupil_outname, index=False) print('Writing processed data to {0}'.format(pupil_outname))
def clean_trials(df, trialevents): resampled_dict = {} for trialnum in trialevents.Trial.unique(): basestart, basestop, respstart, respstop = trialevents.loc[trialevents.Trial==trialnum,'TETTime'] condition = trialevents.loc[trialevents.Trial==trialnum,'Condition'].iat[0] rawtrial = df.loc[(df.TETTime>=basestart) & (df.TETTime<=respstop)] rawtrial.loc[(rawtrial.TETTime>=basestart) & (rawtrial.TETTime<=basestop),'Phase'] = 'Baseline' rawtrial.loc[(rawtrial.TETTime>=respstart) & (rawtrial.TETTime<=respstop),'Phase'] = 'Response' # rawtrial = rawtrial[rawtrial.Condition=='Response'] cleantrial = pupil_utils.deblink(rawtrial) trial_resamp = pupil_utils.resamp_filt_data(cleantrial, filt_type='low', string_cols=['CurrentObject', 'Phase']) baseline = trial_resamp['DiameterPupilLRFilt'].first('1000ms').mean() # baseline = trial_resamp.DiameterPupilLRFilt.iat[0] trial_resamp['Baseline'] = baseline trial_resamp['Dilation'] = trial_resamp['DiameterPupilLRFilt'] - trial_resamp['Baseline'] trial_resamp = trial_resamp[trial_resamp.Phase=='Response'] trial_resamp['Condition'] = condition resampled_dict[trialnum] = trial_resamp dfresamp = pd.concat(resampled_dict, names=['Trial','Timestamp'], sort=True) return dfresamp
def proc_subject(filelist): """Given an infile of raw pupil data, saves out: 1. Session level data with dilation data summarized for each trial 2. Dataframe of average peristumulus timecourse for each condition 3. Plot of average peristumulus timecourse for each condition 4. Percent of samples with blinks """ tpre = 0.250 tpost = 2.5 samp_rate = 30. for pupil_fname in filelist: print('Processing {}'.format(pupil_fname)) if (os.path.splitext(pupil_fname)[-1] == ".gazedata") | ( os.path.splitext(pupil_fname)[-1] == ".csv"): df = pd.read_csv(pupil_fname, sep="\t") elif os.path.splitext(pupil_fname)[-1] == ".xlsx": df = pd.read_excel(pupil_fname, parse_dates=False) else: raise IOError('Could not open {}'.format(pupil_fname)) subid = pupil_utils.get_subid(df['Subject'], pupil_fname) timepoint = pupil_utils.get_timepoint(df['Session'], pupil_fname) df = pupil_utils.deblink(df) df.CurrentObject.replace('StimulusRecord', 'Stimulus', inplace=True) dfresamp = pupil_utils.resamp_filt_data( df, filt_type='band', string_cols=['TrialId', 'CurrentObject']) dfresamp = dfresamp.drop(columns='TrialId_x').rename( columns={'TrialId_y': 'TrialId'}) eprime_fname = get_eprime_fname(pupil_fname) eprime = pd.read_csv(eprime_fname, sep='\t', encoding='utf-16', skiprows=0) if not np.array_equal(eprime.columns[:3], ['ExperimentName', 'Subject', 'Session']): eprime = pd.read_csv(eprime_fname, sep='\t', encoding='utf-16', skiprows=1) eprime = eprime.rename(columns={"Congruency": "Condition"}) pupil_utils.plot_qc(dfresamp, pupil_fname) sessdf = get_sessdf(dfresamp, eprime) sessdf['BlinkPct'] = get_blink_pct(dfresamp, pupil_fname) dfresamp['zDiameterPupilLRFilt'] = pupil_utils.zscore( dfresamp['DiameterPupilLRFilt']) condf, incondf, neutraldf = proc_all_trials( sessdf, dfresamp['zDiameterPupilLRFilt'], tpre, tpost, samp_rate) condf_long = reshape_df(condf) incondf_long = reshape_df(incondf) neutraldf_long = reshape_df(neutraldf) glm_results = ts_glm(dfresamp.zDiameterPupilLRFilt, sessdf.loc[sessdf.Condition == 'C', 'Timestamp'], sessdf.loc[sessdf.Condition == 'I', 'Timestamp'], sessdf.loc[sessdf.Condition == 'N', 'Timestamp'], dfresamp.BlinksLR) # Set subject ID and session as (as type string) glm_results['Subject'] = subid glm_results['Session'] = timepoint save_glm_results(glm_results, pupil_fname) allconddf = condf_long.append(incondf_long).reset_index(drop=True) allconddf = allconddf.append(neutraldf_long).reset_index(drop=True) # Set subject ID and session as (as type string) allconddf['Subject'] = subid allconddf['Session'] = timepoint allconddf = allconddf[allconddf.Timepoint < 3.0] plot_pstc(allconddf, pupil_fname) save_pstc(allconddf, pupil_fname) sessdf['Subject'] = subid sessdf['Session'] = timepoint sessout = pupil_utils.get_proc_outfile(pupil_fname, '_SessionData.csv') sessdf.to_csv(sessout, index=False)