def execute_glms(docs, out_dir, contrast_definitions=None, outputs=None, glm_model='ar1', dataset_id=None, do_preproc=True, smoothed=None, n_jobs=None): """Function to execute a series of GLMs (one per subject) """ # sanity n_jobs = len(docs) n_jobs = min(n_jobs, multiprocessing.cpu_count() / 4) # preprocess the data if do_preproc: if not smoothed is None: fwhm = smoothed else: fwhm = smoothed subject_fmri_perms = {} def subject_factory(): for doc in docs: subject_data = SubjectData() subject_data.subject_id = doc['subject'] # grap anat filename like a ninja subject_data.anat = os.path.join( re.search( ".+?%s\/fMRI\/acquisition1" % subject_data.subject_id, doc['raw_data'][0]).group().replace("fMRI", "t1mri"), "anat_%s_3T_neurospin.img" % subject_data.subject_id) # don't want no ugly supprises hereafter assert os.path.exists(subject_data.anat) # grab subject session ids like a ninja subject_data.session_id = sorted(list(set( [re.search("\/acquisition1\/(.+?)\/", x).group(1) for x in doc['raw_data']]))) # collect list of lists of 3D scans (one list per session) perm = [] subject_data.func = [sorted( [x for x in doc['raw_data'] if s in x]) for s in subject_data.session_id] _tmp = [x for session_fmri_files in subject_data.func for x in session_fmri_files] for fmri_filename in doc['raw_data']: for k in range(len(_tmp)): if fmri_filename == _tmp[k]: perm.append(k) break subject_fmri_perms[subject_data.subject_id] = perm # set subject output directory (so there'll be no pollution) subject_data.output_dir = os.path.join( out_dir, subject_data.subject_id) if not os.path.exists(subject_data.output_dir): os.makedirs(subject_data.output_dir) # yield input data for this subject yield subject_data preproc_results = do_subjects_preproc( subject_factory(), output_dir=out_dir, dataset_id=dataset_id, fwhm=fwhm, n_jobs=n_jobs, # do_report=False, ) # sanitize assert len(preproc_results) == len(docs) for doc in docs: for preproc_result in preproc_results: if preproc_result['subject_id'] == doc['subject']: # fix shuffled (due to sorting in preproc pipeline) # session-wise fmri files, lest activation maps will be # ultimate garbage doc['data'] = list(np.array([ x for session_fmri_files in preproc_result['func'] for x in session_fmri_files])[ subject_fmri_perms[doc['subject']]]) break # execute one GLM per subject if do_preproc: output_dir = out_dir else: output_dir = os.path.join(out_dir, "not_repreprocessed") joblib.Parallel(n_jobs=max(n_jobs / 4, 1))(joblib.delayed(execute_glm)( doc, output_dir, contrast_definitions, outputs, glm_model, ) for doc in docs)
"""fetch input data""" _subject_data = fetch_fsl_feeds_data(data_dir) subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.subject_id = "sub001" subject_data.func = _subject_data["func"] unzip_nii_gz(os.path.dirname(subject_data.func)) subject_data.anat = _subject_data["anat"] subject_data.output_dir = os.path.join( output_dir, subject_data.subject_id) unzip_nii_gz(os.path.dirname(subject_data.anat)) """preprocess the data""" results = nipype_preproc_spm_utils.do_subjects_preproc( [subject_data], output_dir=output_dir, # fwhm=[5, 5, 5], dataset_id="FSL FEEDS single-subject", dataset_description=DATASET_DESCRIPTION, do_shutdown_reloaders=False, ) """collect preprocessed data""" fmri_files = results[0]['func'] anat_file = results[0]['anat'] """specify contrasts""" contrasts = {} n_columns = len(design_matrix.names) I = np.eye(len(design_matrix.names)) for i in xrange(paradigm.n_conditions): contrasts['%s' % design_matrix.names[2 * i]] = I[2 * i]
data_dir=DATA_DIR, subject_ids=subject_ids, n_jobs=len(subject_ids)) def subject_factory(): """producer for subject (input) data""" for subject_id, sd in haxby_data.iteritems(): subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.session_id = "haxby2001" subject_data.subject_id = subject_id unzip_nii_gz(sd.subject_dir) subject_data.anat = sd.anat.replace(".gz", "") subject_data.func = sd.bold.replace(".gz", "") subject_data.output_dir = os.path.join( OUTPUT_DIR, subject_data.subject_id) yield subject_data """do preprocessing proper""" results = nipype_preproc_spm_utils.do_subjects_preproc( subject_factory(), output_dir=OUTPUT_DIR, dataset_id="HAXBY 2001", do_realign=False, do_coreg=False, do_dartel=DO_DARTEL, do_cv_tc=False, dataset_description=DATASET_DESCRIPTION, )
def preproc_abide_institute(institute_id, abide_data_dir, abide_output_dir, do_dartel=True, do_report=True, n_jobs=-1, ): """Preprocesses a given ABIDE institute """ # set institute output dir institute_output_dir = os.path.join(abide_output_dir, institute_id) if not os.path.exists(institute_output_dir): os.makedirs(institute_output_dir) # set subject id wildcard for globbing institute subjects subject_id_wildcard = "%s_*/%s_*" % (institute_id, institute_id) # glob for subject ids subject_ids = [os.path.basename(x) for x in glob.glob(os.path.join(abide_data_dir, subject_id_wildcard))] # sort the ids subject_ids.sort() ignored_subject_ids = [] # producer subject data def subject_factory(): for subject_id in subject_ids: subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.subject_id = subject_id try: subject_data.func = glob.glob( os.path.join( abide_data_dir, "%s/%s/scans/rest*/resources/NIfTI/files/rest.nii" % ( subject_id, subject_id)))[0] except IndexError: ignored_because = "no rest data found" print "Ignoring subject %s (%s)" % (subject_id, ignored_because) ignored_subject_ids.append((subject_id, ignored_because)) continue try: subject_data.anat = glob.glob( os.path.join( abide_data_dir, "%s/%s/scans/anat/resources/NIfTI/files/mprage.nii" % ( subject_id, subject_id)))[0] except IndexError: if do_dartel: # can't do DARTEL in under such conditions continue try: subject_data.hires = glob.glob( os.path.join( abide_data_dir, ("%s/%s/scans/hires/resources/NIfTI/" "files/hires.nii") % (subject_id, subject_id)))[0] except IndexError: ignored_because = "no anat/hires data found" print "Ignoring subject %s (%s)" % (subject_id, ignored_because) ignored_subject_ids.append((subject_id, ignored_because)) continue subject_data.output_dir = os.path.join( os.path.join( institute_output_dir, subject_id)) yield subject_data # do preprocessing proper report_filename = os.path.join(institute_output_dir, "_report.html") nipype_preproc_spm_utils.do_subjects_preproc( subject_factory(), dataset_id=institute_id, output_dir=institute_output_dir, do_report=do_report, do_dartel=do_dartel, dataset_description="%s" % DATASET_DESCRIPTION.replace( "%s", institute_id), report_filename=report_filename, do_shutdown_reloaders=True,) for subject_id, ignored_because in ignored_subject_ids: print "Ignored %s because %s" % (subject_id, ignored_because)
def dataset_preprocessing(dataset_id, data_dir, output_dir, ignore_list=None, dataset_description=None): """Main function for preprocessing (and analysis ?) Parameters ---------- returns list of Bunch objects with fields anat, func, and subject_id for each preprocessed subject """ data_dir = os.path.join(data_dir, dataset_id) if not os.path.exists(data_dir): os.makedirs(data_dir) output_dir = os.path.join(output_dir, dataset_id) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset_description = DATASET_DESCRIPTION if \ dataset_description is None else dataset_description ignore_list = [] if ignore_list is None else ignore_list # glob for subject ids subjects_id = [ os.path.basename(x) for x in glob.glob(os.path.join(data_dir, subject_id_wildcard))] subjects_id.sort() sessions_id = {} # producer subject data def subject_factory(): for subject_id in subjects_id: if subject_id in ignore_list: continue sessions = get_subject_sessions(os.path.join(data_dir, subject_id)) sessions_id[subject_id] = sessions # construct subject data structure subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.session_id = sessions subject_data.subject_id = subject_id subject_data.func = [] assert sessions != [] # glob for bold data has_bad_sessions = False for session_id in subject_data.session_id: bold_dir = os.path.join( data_dir, "%s/BOLD/%s" % (subject_id, session_id)) # extract .nii.gz to .nii unzip_nii_gz(bold_dir) # glob bold data for this session func = glob.glob(os.path.join(bold_dir, "bold.nii")) # check that this session is OK (has bold data, etc.) if not func: has_bad_sessions = True break subject_data.func.append(func[0]) # exclude subject if necessary if has_bad_sessions: continue # glob for anatomical data anat_dir = os.path.join( data_dir, "%s/anatomy" % subject_id) # extract .nii.gz to .ni unzip_nii_gz(anat_dir) # glob anatomical data proper subject_data.anat = glob.glob( os.path.join( data_dir, "%s/anatomy/highres001_brain.nii" % subject_id))[0] # set subject output dir (all calculations for # this subject go here) subject_data.output_dir = os.path.join( output_dir, subject_id) yield subject_data # do preprocessing proper report_filename = os.path.join(output_dir, "_report.html") for results in nipype_preproc_spm_utils.do_subjects_preproc( subject_factory(), n_jobs=1, output_dir=output_dir, do_deleteorient=True, # some openfmri data have garbage orientation do_dartel=DO_DARTEL, dataset_id=dataset_id, # do_cv_tc=False, dataset_description=dataset_description, # do_report=False, report_filename=report_filename, do_shutdown_reloaders=True, # XXX rm this if u want to chain GLM QA ): pass subject_id = results['subject_id'] # dump results in openfmri layout if not isinstance(results['estimated_motion'], list): results['estimated_motion'] = [results['estimated_motion']] if not isinstance(results['func'], list): results['func'] = [results['func']] img = nb.load(results['anat']) nb.save(img, os.path.join( data_dir, subject_id, 'anatomy', 'normalized_highres001.nii.gz')) for session_id, motion, func in zip(sessions_id[subject_id], results['estimated_motion'], results['func']): # estimated motion shutil.copyfile(motion, os.path.join( data_dir, subject_id, 'BOLD', session_id, 'motion.txt')) # preprocessed bold img = nb.load(func) nb.save(img, os.path.join( data_dir, subject_id, 'BOLD', session_id, 'normalized_bold.nii.gz'))
import reporting.glm_reporter as glm_reporter DATA_DIR = "/home/elvis/Downloads/alex_spm/" OUTPUT_DIR = "spm_multimodal_runs" # fetch the data subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.subject_id = "sub001" subject_data.session_id = ["Session1", "Session2"] subject_data.func = [sorted(glob.glob(os.path.join( DATA_DIR, "fMRI/%s/fMETHODS-*.img" % s))) for s in subject_data.session_id] subject_data.anat = os.path.join(DATA_DIR, "sMRI/smri.img") subject_data.output_dir = os.path.join(OUTPUT_DIR, subject_data.subject_id) """preprocess the data""" results = nipype_preproc_spm_utils.do_subjects_preproc( [subject_data], output_dir=OUTPUT_DIR, fwhm=[8, 8, 8], dataset_id="SPM MULTIMODAL (see @alex)", do_shutdown_reloaders=False, )
# set anat subject_data.anat = [ x.replace(".gz", "") for x in session_anat if subject_id in x] assert len(subject_data.anat) == 1 subject_data.anat = subject_data.anat[0] unzip_nii_gz(os.path.dirname(subject_data.anat)) # set subject output directory subject_data.output_dir = os.path.join( session_output_dir, subject_data.subject_id) yield subject_data # do preprocessing proper for session in SESSIONS: session_output_dir = os.path.join(OUTPUT_DIR, "session%i" % session) # preproprec this session for all subjects print ("\r\n\r\n\t\t\tPreprocessing session %i for all subjects..." "\r\n\r\n") % session nipype_preproc_spm_utils.do_subjects_preproc( subject_factory(session_output_dir, session), output_dir=session_output_dir, do_deleteorient=True, do_dartel=DARTEL, dataset_id="NYU Test Rest session %i" % session, dataset_description=DATASET_DESCRIPTION, ) print "Done (NYU Test Rest preprocessing)"
def main(data_dir, output_dir, exclusions=None, dataset_id=None): """Main function for preprocessing (and analysis ?) Parameters ---------- returns list of Bunch objects with fields anat, func, and subject_id for each preprocessed subject """ exclusions = [] if exclusions is None else exclusions # glob for subject ids subject_ids = [ os.path.basename(x) for x in glob.glob(os.path.join(data_dir, subject_id_wildcard))] model_dirs = glob.glob(os.path.join( data_dir, subject_ids[0], 'model', '*')) session_ids = [ os.path.basename(x) for x in glob.glob(os.path.join(model_dirs[0], 'onsets', '*'))] session_ids.sort() subject_ids.sort() # producer subject data def subject_factory(): for subject_id in subject_ids: if subject_id in exclusions: continue # construct subject data structure subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.session_id = session_ids subject_data.subject_id = subject_id subject_data.func = [] # glob for bold data has_bad_sessions = False for session_id in subject_data.session_id: bold_dir = os.path.join( data_dir, "%s/BOLD/%s" % (subject_id, session_id)) # extract .nii.gz to .nii unzip_nii_gz(bold_dir) # glob bold data for this session func = glob.glob(os.path.join(bold_dir, "bold.nii")) # check that this session is OK (has bold data, etc.) if not func: has_bad_sessions = True break subject_data.func.append(func[0]) # exclude subject if necessary if has_bad_sessions: continue # glob for anatomical data anat_dir = os.path.join( data_dir, "%s/anatomy" % subject_id) # extract .nii.gz to .ni unzip_nii_gz(anat_dir) # glob anatomical data proper subject_data.anat = glob.glob( os.path.join( data_dir, "%s/anatomy/highres001_brain.nii" % subject_id))[0] # set subject output dir (all calculations for # this subject go here) subject_data.output_dir = os.path.join( output_dir, subject_id) yield subject_data # do preprocessing proper report_filename = os.path.join(output_dir, "_report.html") return nipype_preproc_spm_utils.do_subjects_preproc( subject_factory(), dataset_id=dataset_id, output_dir=output_dir, do_deleteorient=True, # some openfmri data have garbage orientation do_dartel=DO_DARTEL, # do_cv_tc=False, dataset_description=DATASET_DESCRIPTION, report_filename=report_filename, do_shutdown_reloaders=True )
"""fetch spm auditory data""" _subject_data = fetch_spm_auditory_data(DATA_DIR) subject_data = nipype_preproc_spm_utils.SubjectData() subject_data.subject_id = "sub001" subject_data.func = _subject_data["func"] subject_data.anat = _subject_data["anat"] subject_data.output_dir = os.path.join( OUTPUT_DIR, subject_data.subject_id) print subject_data.func """preprocess the data""" results = nipype_preproc_spm_utils.do_subjects_preproc( [subject_data], output_dir=OUTPUT_DIR, dataset_id="SPM single-subject auditory", dataset_description=DATASET_DESCRIPTION, do_shutdown_reloaders=False, ) """collect preprocessed data""" """collect preprocessed data""" fmri_files = results[0]['func'] anat_file = results[0]['anat'] import nibabel as ni if isinstance(fmri_files, basestring): fmri_img = ni.load(fmri_files) else: output_filename = '/tmp/spm_auditory.nii.gz'
def execute_glms(docs, out_dir, contrast_definitions=None, outputs=None, glm_model='ar1', dataset_id=None, do_preproc=True, smoothed=None, n_jobs=None): """Function to execute a series of GLMs (one per subject) """ # sanity n_jobs = len(docs) n_jobs = min(n_jobs, multiprocessing.cpu_count() / 4) # preprocess the data if do_preproc: if not smoothed is None: fwhm = smoothed else: fwhm = smoothed subject_fmri_perms = {} def subject_factory(): for doc in docs: subject_data = SubjectData() subject_data.subject_id = doc['subject'] # grap anat filename like a ninja subject_data.anat = os.path.join( re.search( ".+?%s\/fMRI\/acquisition1" % subject_data.subject_id, doc['raw_data'][0]).group().replace("fMRI", "t1mri"), "anat_%s_3T_neurospin.img" % subject_data.subject_id) # don't want no ugly supprises hereafter assert os.path.exists(subject_data.anat) # grab subject session ids like a ninja subject_data.session_id = sorted( list( set([ re.search("\/acquisition1\/(.+?)\/", x).group(1) for x in doc['raw_data'] ]))) # collect list of lists of 3D scans (one list per session) perm = [] subject_data.func = [ sorted([x for x in doc['raw_data'] if s in x]) for s in subject_data.session_id ] _tmp = [ x for session_fmri_files in subject_data.func for x in session_fmri_files ] for fmri_filename in doc['raw_data']: for k in xrange(len(_tmp)): if fmri_filename == _tmp[k]: perm.append(k) break subject_fmri_perms[subject_data.subject_id] = perm # set subject output directory (so there'll be no pollution) subject_data.output_dir = os.path.join(out_dir, subject_data.subject_id) if not os.path.exists(subject_data.output_dir): os.makedirs(subject_data.output_dir) # yield input data for this subject yield subject_data preproc_results = do_subjects_preproc( subject_factory(), output_dir=out_dir, dataset_id=dataset_id, fwhm=fwhm, n_jobs=n_jobs, # do_report=False, ) # sanitize assert len(preproc_results) == len(docs) for doc in docs: for preproc_result in preproc_results: if preproc_result['subject_id'] == doc['subject']: # fix shuffled (due to sorting in preproc pipeline) # session-wise fmri files, lest activation maps will be # ultimate garbage doc['data'] = list( np.array([ x for session_fmri_files in preproc_result['func'] for x in session_fmri_files ])[subject_fmri_perms[doc['subject']]]) break # execute one GLM per subject if do_preproc: output_dir = out_dir else: output_dir = os.path.join(out_dir, "not_repreprocessed") joblib.Parallel(n_jobs=max(n_jobs / 4, 1))(joblib.delayed(execute_glm)( doc, output_dir, contrast_definitions, outputs, glm_model, ) for doc in docs)