def cogtest_manipulation(tbldict, roc_cols): """Process table containing cognitive testing data Parameters ---------- tbldict : dict Dictionary of DataFrames each containing a different type of data roc_cols : list List of strings corresponding to column names for which rate of change should be calculated Returns ------- tbldict : dict The same as input tbldict but now with new item """ tbldict['cogtests'] = pd.merge(tbldict['cogtestdates'],tbldict['cogdata'],on=['codeb','NP_Tp']) del tbldict['cogtestdates'] del tbldict['cogdata'] for col in roc_cols: tbldict['cogtests'] = cf.rate_of_change(tbldict['cogtests'], 'codeb', 'NP_Tp', 'NP_Date', col, '%s_sl' %col) #add column for maximum follow-up time per subject tbldict['cogtests'] = cf.max_per_sub(tbldict['cogtests'], 'codeb', 'NP_YrsRelBL', 'NP_Followup_Time') return tbldict
def mri_run(datadir, outdir, rois): """Main function to collect MRI volume data Parameters ---------- datadir : string Full path to root directory of freesurfer processed data. Expect file tree to be datadir/subcode/stats/aseg.stats outdir : string Full path to directory where data will be saved rois : list of strings List of freesurfer rois of interest. These volumes of these rois will be inserted in aseg_change along with their rates of change Returns ------- aseg_stats : pandas DataFrame DataFrame where each row is a scan, and columns are volumes of all freesurfer processed regions aseg_change : pandas DataFrame DataFrame where each row is a scan, and columns are the volumes of interest, their rates of change, and icv correction """ #get aseg_stats data from freesurfer processed data outfile = '%sFS_aseg_stats.txt' %outdir subs, asegout, output = extractFSasegstats(datadir, outfile) aseg_stats = pd.read_csv(outfile, header=0, delim_whitespace=True) #add columns for SubjID and MRI_TP aseg_stats['codea'] = [cf.get_id(sub) for sub in subs] aseg_stats['MRI_Tp'] = [cf.get_tp(sub) for sub in subs] aseg_stats.drop('Measure:volume', axis=1, inplace=True) #get dates of MRI scans that were processed with freesurfer mridates = bacs_pet_mri_date_batch(datadir) aseg_change = pd.merge(aseg_change, mridates, on=['codea','MRI_Tp']) rois_icvcorr = dict([(roi, '%s_icvcorr' %roi) for roi in rois]) aseg_change = icvcorr(aseg_change, rois_icvcorr, 'IntraCranialVol') #calculate rate of change in years for roi in rois: aseg_change = cf.rate_of_change(aseg_change, 'codea', 'MRI_Tp', 'MRI_Scandate', roi, '%s_sl' %roi) cf.save_xls_and_pkl(aseg_stats, 'aseg_stats', outdir) cf.save_xls_and_pkl(aseg_change, 'aseg_change', outdir) return aseg_stats, aseg_change
def pibparams_run(path_pib, pibrename, outdir, pibcutoff): """Reads data from the spreadsheet, does some calculations, and returns a Pandas dataframe with PIB data. Parameters ---------- path_pib : string String of full path to *.xls pibrename : dict Dictionary of name:rename pairs, where the keys are columns in the PIB spreadsheet and values are what to rename the keys to outdir : string Full path where final dataframe will be saved pibcutoff : float PIB cutoff value Returns ------- pib_df : pandas dataframe Dataframe containing all PIB data """ #read in pib data from old sheet pib_old = pd.read_excel(path_pib, sheetname='i') #read in PIB data from longitudinal timepoints pib_long = pd.read_excel(path_pib, sheetname='j') #concatenate PIB tables pib_df = pd.concat([pib_long, pib_old]) pib_df = pib_df[pibrename.keys()] pib_df.rename(columns=pibrename, inplace=True) #make binary PIB value pib_df['PIB_Pos'] = pib_df['PIB_Index'].apply(lambda x: 1 if x >= pibcutoff else 0) #calculate rate of change of PIB_Index in years pib_df = cf.rate_of_change(pib_df, 'codea', 'PIB_Tp', 'PIB_Scandate', 'PIB_Index', 'PIB_sl') #make column for the age at which PIB positivity appears pib_df.sort(columns=['codea','PIB_Tp'], inplace=True) #calculate age of PIB positivity pib_df['PIB_agepos'] = float('nan') pib_df = pib_df.groupby(by='codea') pib_df = pib_df.apply(f) cf.save_xls_and_pkl(pib_df, 'pibparams', outdir) return pib_df