def test_regex_ignorecase(tmp_crumb): assert not os.path.exists(tmp_crumb._path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['SUBJ_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re') # re.match assert len(crumb['subject_id']) == 0 assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore assert not crumb.unfold() crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re.ignorecase') # re.match assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore ucrumb = crumb.unfold()[0] assert crumb._re_method == ucrumb._re_method assert crumb._ignore == ucrumb._ignore re_subj_ids = crumb['subject_id'] assert re_subj_ids == ['SUBJ_{:03}'.format(i) for i in range(20, 30)]
def test_ls_and_getitem(): base_dir = os.path.expanduser('~') crumb = Crumb(os.path.join(base_dir, '{user_folder}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set(os.listdir(base_dir)) crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set([d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]) flst = crumb.ls('user_folder', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, str) for f in flst]) assert all([not os.path.exists(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, str) for f in flst]) assert all([os.path.exists(f) or os.path.islink(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=True, check_exists=False) assert all([f.exists() or f.is_symlink() for f in flst]) flst1 = crumb.ls('files', fullpath=False, make_crumbs=False, check_exists=True) flst2 = crumb['files'] assert all([isinstance(f, str) for f in flst1]) assert flst1 == flst2
def test_split2(): cr = Crumb('/home/hansel/data/{subj}/{session}/anat.nii') assert cr.split() == ('/home/hansel/data', '{subj}/{session}/anat.nii') cr = Crumb('{base}/home/hansel/data/{subj}/{session}/anat.nii') assert cr.split() == ('', cr.path) cr = Crumb('/home/hansel/data/subj/session/anat.nii') assert cr.split() == (cr.path, '') notvalid_crumb = '/home/hansel/data/{subj_notvalidcrumb/{session}/anat.nii' pytest.raises(ValueError, _split, notvalid_crumb)
def test_abspath(crumb): crumb2 = crumb.abspath(first_is_basedir=False) assert crumb2._path == os.path.join(os.path.abspath(os.path.curdir), crumb._path) assert crumb is not crumb2 assert crumb2.isabs() assert crumb != crumb2 assert 'base_dir' in set(_arg_names(crumb2.path)) crumb3 = crumb.abspath(first_is_basedir=True) assert crumb3._path == os.path.join(os.path.abspath(os.path.curdir), crumb._path.replace('{base_dir}/', '')) assert crumb is not crumb3 assert crumb3.isabs() assert crumb3 != crumb2 home_crumb = Crumb(os.path.expanduser('~'), ignore_list=['a*']) assert home_crumb._abspath() == os.path.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore base_dir = BASE_DIR crumb2 = crumb.replace(base_dir=base_dir) crumbc = crumb2.abspath(first_is_basedir=False) assert crumbc == crumb2 assert crumbc is not crumb2
def test_ignore_lst(): import fnmatch base_dir = os.path.expanduser('~') crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}')) folders = crumb['user_folder'] # set(fnmatch.filter(crumb['user_folder'], '.*')) ign_crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}'), ignore_list=('.*',)) ign_folders = ign_crumb['user_folder'] assert set(ign_folders) == set([item for item in folders if not fnmatch.fnmatch(item, '.*')]) assert set(folders) > set(ign_folders) uign_crumb = ign_crumb.unfold() assert ign_crumb._re_method == uign_crumb[0]._re_method assert ign_crumb._ignore == uign_crumb[0]._ignore
def test_regex_replace(tmp_crumb): assert not os.path.exists(tmp_crumb._path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch assert tmp_crumb.ls('subject_id:subj_02*', make_crumbs=False) == crumb.ls('subject_id', make_crumbs=False) anat_crumb = crumb.replace(modality='anat') assert anat_crumb.exists() fn_subj_ids = {cr['subject_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert fn_subj_ids == set(['subj_{:03}'.format(i) for i in range(20, 30)]) sessions = {cr['session_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert sessions == set(values_dict['session_id'])
def convert(self, value, param, ctx): try: cr = Crumb(path.expanduser(value), ignore_list=['.*']) except ValueError: self.fail('%s is not a valid crumb path.' % value, param, ctx) else: return cr
def motion_stats_sheet(motion_file_cr, crumb_fields): """ Return a pandas.DataFrame with some of the motion statistics obtained from the `statistics_files` output of the nipype.RapidArt found in the hansel.Crumb `motion_file_cr`. Parameters ---------- motion_file_cr: str crumb_fields: list of str Returns ------- df: pandas.DataFrame Examples -------- >>> motion_stats_sheet(motion_file_cr="/home/hansel/data/thomas/out/{group}/{patient_id}/{session}/rest/artifact_stats/motion_stats.json", \ >>> crumb_fields=['group', 'patient_id', 'session']) """ def get_motion_record(mtn_file_cr, crumb_fields): """ Return an OrderedDict of the information found in the `mtn_file_cr` and also `crumb_fields` Crumb argument values.""" stats = json.load(open(str(mtn_file_cr))) outliers = stats[1] motion_norm = stats[3]['motion_norm'] #outliers_hdr = list(outliers.keys()) motion_hdr = ['{}_motion_norm'.format(k) for k in motion_norm.keys()] mtn_record = OrderedDict() for fn in crumb_fields: mtn_record[fn] = mtn_file_cr[fn][0] mtn_record.update(outliers) for hdr, fn in zip(motion_hdr, motion_norm): mtn_record[hdr] = motion_norm[fn] return mtn_record # process the input motion_file_cr = Crumb(motion_file_cr) crumb_fields = [ crf.strip() for crf in crumb_fields[1:-1].replace("'", "").split(',') ] # create the motion records motionstats = [ get_motion_record(stats_file, crumb_fields) for stats_file in motion_file_cr.ls() ] # create a pandas Dataframe out of it df = pd.DataFrame.from_records(motionstats, columns=motionstats[0].keys()) # return the dataframe return df
def test_regex(tmp_crumb): assert not os.path.exists(tmp_crumb.path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re') # re.match re_subj_ids = crumb['subject_id'] assert re_subj_ids == ['subj_{:03}'.format(i) for i in range(20, 30)] assert crumb.ls('subject_id:^subj_02.*$') == crumb.ls('subject_id') crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch fn_subj_ids = crumb['subject_id'] assert fn_subj_ids == re_subj_ids cr_bkp = crumb.copy() assert crumb.ls('subject_id:subj_02*') == crumb.ls('subject_id') assert crumb['subject_id'][0] == crumb.get_first('subject_id') assert crumb.patterns['subject_id'] == cr_bkp.patterns['subject_id'] assert not crumb.ls('subject_id:subj_03*') == crumb.ls('subject_id') assert crumb.patterns['subject_id'] == cr_bkp.patterns['subject_id'] pytest.raises(ValueError, Crumb, tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='hansel') crumb2 = Crumb.copy(crumb) assert crumb2._re_method == crumb._re_method assert crumb2._re_args == crumb._re_args assert crumb2.patterns == crumb.patterns assert len(crumb2.patterns) == 1 assert 'subject_id' in crumb2.patterns.keys()
def test_ls3(): from glob import glob base_dir = os.path.expanduser('~') files = [d for d in glob(os.path.join(base_dir, '*')) if os.path.isfile(d)] crumb = Crumb(os.path.join(files[0], '{user_folder}', '{files}')) lst = crumb.ls('user_folder') assert not lst lst = crumb.ls('files') assert not lst
def test_ls_raises(): crumb = Crumb(os.path.join('{home}', '{user_folder}')) pytest.raises(KeyError, crumb.ls, 'hansel') pytest.raises(NotImplementedError, crumb.ls, 'home') crumb['home'] = os.path.expanduser('~') pytest.raises(ValueError, crumb.ls, '', fullpath=False)
def clinical_pype(ctx, wf_name="spm_anat_preproc", base_dir="", cache_dir="", output_dir="", settings_file='', plugin="MultiProc", n_cpus=4): """ Run the basic pipeline. Parameters ---------- wf_name: str base_dir: str cache_dir: str output_dir: str year: str or int plugin: str n_cpus: int """ from neuro_neuro_pypes.datasets import clinical_crumb_workflow data_path = os.path.join(os.path.expanduser(base_dir), '{year}', '{subject_id}', '{session_id}', '{image}') data_crumb = Crumb(data_path, ignore_list=['.*']) atlas_file = HAMM_MNI wf = clinical_crumb_workflow( wf_name=wf_name, data_crumb=data_crumb, cache_dir=os.path.abspath(os.path.expanduser(cache_dir)) if cache_dir else '', output_dir=os.path.abspath(os.path.expanduser(output_dir)) if output_dir else '', config_file=settings_file, params={'atlas_file': atlas_file}, ) if n_cpus > 1: run_wf(wf, plugin=plugin, n_cpus=n_cpus) else: run_wf(wf, plugin=None)
def test_abspath2(): # do a real test with user folder and ignore_list import getpass username = getpass.getuser() user_folder = os.path.join('{base}', username) old_dir = os.getcwd() os.chdir(os.path.join(os.path.expanduser('~'), '..')) home_crumb = Crumb(user_folder, ignore_list=['a*']) assert home_crumb._abspath(first_is_basedir=True) == os.path.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore os.chdir(old_dir)
def test_regex_replace2(tmp_crumb): assert not os.path.exists(tmp_crumb.path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) # a crumb with the pattern crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch # a crumb without the pattern, the pattern is added later crumb2 = Crumb(tmp_crumb.path, regex='fnmatch') crumb2.set_pattern('subject_id', 'subj_02*') assert crumb['subject_id'] == crumb2['subject_id'] crumb2.clear_pattern('subject_id') assert tmp_crumb['subject_id'] == crumb2['subject_id']
def cobre_pype(ctx, wf_name="spm_anat_rest_preproc", base_dir="", cache_dir="", output_dir="", settings_file="", plugin=None, n_cpus=4): """ Run the ParametersA ---------- wf_name: str base_dir: str Base path to where the data is cache_dir: str output_dir: str year: str or int plugin: str n_cpus: int """ from neuro_pypes.datasets import cobre_crumb_workflow data_path = os.path.join(os.path.expanduser(base_dir), '{subject_id}', 'session_1', '{modality}', '{image}') data_crumb = Crumb(data_path, ignore_list=['.*']) wf = cobre_crumb_workflow( wf_name=wf_name, data_crumb=data_crumb, cache_dir=os.path.abspath(os.path.expanduser(cache_dir)) if cache_dir else '', output_dir=os.path.abspath(os.path.expanduser(output_dir)) if output_dir else '', config_file=settings_file, params={'atlas_file': HAMM_MNI}, ) run_wf(wf, plugin=plugin, n_cpus=n_cpus)
def test_equal_copy(crumb): crumb2 = Crumb.copy(crumb) assert crumb2 == crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb2._path += '/' assert crumb2 != crumb crumb2._path == os.path.join(crumb._path, '{test}') assert crumb2 != crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb3 = Crumb(crumb.path, ignore_list=['.*']) assert crumb3 != crumb
def make_tree_from_crumb(base_path, crumb_path, crumb_args: [Dict, CrumbArgsSequences]): crumb = Crumb(crumb_path) crumb2 = crumb.replace(base_dir=base_path) assert not os.path.exists(crumb2._path) assert not crumb2.has_files() if isinstance(crumb_args, dict): values_map = list(ParameterGrid(crumb_args)) elif isinstance(crumb_args, list): values_map = crumb_args else: raise TypeError( 'Expected `crumb_args` to be dict or list, got {}.'.format( type(crumb_args))) mktree(crumb2, values_map) assert os.path.exists(crumb2.split()[0]) assert not crumb2.has_files() return crumb2
def test_set_patterns(tmp_crumb): assert not os.path.exists(tmp_crumb.path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) # a crumb without the pattern, the pattern is added later crumb2 = Crumb(tmp_crumb.path, regex='fnmatch') crumb3 = crumb2.copy() crumb3.set_patterns() assert crumb2 == crumb3 pytest.raises(KeyError, crumb2.set_patterns, somekey='somevalue') crumb3.set_pattern('subject_id', 'subj_02*') crumb2.set_patterns(subject_id='subj_02*') assert crumb2['subject_id'] == crumb3['subject_id']
def subj_data_from_dicoms(ctx, crumb_path, arg_name, verbose=False): """ Print a list of folder_name -> NUK id. The NUK ID is calculated from the first DICOM file found in the end of the `dicom_path`. Parameters ---------- crumb_path: str Path with Crumbs to the DICOM files, e.g., /home/hansel/data/{subj_id}/{session}/{acq}/{dcm_file} arg_name: str Name of the argument in `dicom_path` of the subj_id Returns ------- subj_data: dict of subj records A dict with records of the information extracted from the DICOM files as well as the calculated NUK Pseudonym. """ if verbose: verbose_switch(verbose) crumb = Crumb(os.path.expanduser(os.path.abspath(crumb_path)), ignore_list=['.*']) if not crumb.has_crumbs(): raise ValueError('Expected a path with crumb arguments, e.g., ' '"/home/hansel/data/{group}/{sid}/{session}"') subj_nuks = [] for path in crumb.ls(arg_name): log.info('Reading DICOMs in {}.'.format(path)) subj_path = path.split()[0] subj = _read_dcm_until_valid(subj_path) if subj is None: log.info('Could not find a valid DICOM in {}.'.format(subj_path)) else: subj_nuks.append(subj) return subj_nuks
import os.path as path from hansel import Crumb from neuro_pypes.datasets import cobre_crumb_workflow from neuro_pypes.run import run_debug # we downloaded the database in: base_dir = '/home/louis/Downloads/FMRI/Baltimore_2' cobre_tree = path.join('{subject_id}', '{modality}', '{image}') # we define the database tree cobre_crumb = Crumb(path.join(base_dir, cobre_tree), ignore_list=['.*']) # output and working dir output_dir = path.join(path.dirname(base_dir), 'out') cache_dir = path.join(path.dirname(base_dir), 'wd') # we have a configuration file in: config_file = path.join(path.dirname(base_dir), 'pypes_config.yml') # we choose what pipeline set we want to run. # the choices are: 'spm_anat_preproc', 'spm_rest_preproc' wf_name = 'spm_anat_preproc' # for MPRAGE and rs-fMRI preprocessing # instantiate the workflow wf = cobre_crumb_workflow(wf_name=wf_name, data_crumb=cobre_crumb, cache_dir=cache_dir, output_dir=output_dir, config_file=config_file) # run it
def crumb(): yield Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}")
def dcm2nii(ctx, input_crumb_path, output_dir, regex='fnmatch', ncpus=3): """ Convert all DICOM files within `input_crumb_path` into NifTI in `output_folder`. Will copy only the NifTI files reoriented by MRICron's dcm2nii command. Will rename the NifTI files that are matched with recognized modalities to the short modality name from config.ACQ_PATTERNS. Parameters ---------- input_dir: str A crumb path str indicating the whole path until the DICOM files. Example: '/home/hansel/data/{group}/{subj_id}/{session_id}/{acquisition}/{dcm_file} The crumb argument just before the last one will be used as folder container reference for the DICOM series. output_dir: str The root folder path where to save the tree of nifti files. Example: '/home/hansel/nifti' This function will create the same tree as the crumbs in input_crumb_path, hence for the example above the output would have the following structure: '/home/hansel/nifti/{group}/{subj_id}/{session_id}/{nifti_file}' Where {nifti_file} will take the name from the {acquisition} or from the patterns in ACQ_PATTERNS in `config.py` file. regex: str The regular expression syntax you may want to set in the Crumbs. See hansel.Crumb documentation for this. ncpus: int this says the number of processes that will be launched for dcm2nii in parallel. """ from boyle.dicom.convert import convert_dcm2nii input_dir = os.path.expanduser(input_crumb_path) output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): log.info('Creating output folder {}.'.format(output_dir)) os.makedirs(output_dir) else: log.info('Output folder {} already exists, this will overwrite/merge ' 'whatever is inside.'.format(output_dir)) input_dir = Crumb(input_dir, regex=regex, ignore_list=['.*']) if not input_dir.has_crumbs(): raise ValueError( 'I am almost sure that this cannot work if you do not ' 'use crumb arguments in the input path, got {}.'.format(input_dir)) acq_folder_arg, last_in_arg = tuple(input_dir.all_args())[-2:] out_arg_names = [ '{' + arg + '}' for arg in tuple(input_dir.all_args())[:-1] ] output_dir = Crumb(os.path.join(output_dir, *out_arg_names), regex=regex, ignore_list=['.*']) src_dst = [] acquisitions = input_dir.ls(acq_folder_arg, make_crumbs=True) for acq in acquisitions: out_args = acq.arg_values.copy() acq_out = output_dir.replace(**out_args) out_dir = os.path.dirname(acq_out.path) out_file = os.path.basename(acq_out.path) + '.nii.gz' os.makedirs(out_dir, exist_ok=True) src_dst.append((acq.split()[0], out_dir, out_file)) if ncpus > 1: import multiprocessing as mp pool = mp.Pool(processes=ncpus) results = [ pool.apply_async(convert_dcm2nii, args=(dr, ss, dst)) for dr, ss, dst in src_dst ] _ = [p.get() for p in results] else: _ = [convert_dcm2nii(path, sess, dst) for path, sess, dst in src_dst]
def motion_stats_sheet(ctx, motion_file_cr, crumb_fields, out_path): """ Create in `out_path` an Excel spreadsheet with some of the motion statistics obtained from the `statistics_files` output of the nipype.RapidArt found in the hansel.Crumb `motion_file_cr`. Parameters ---------- motion_file_cr: str crumb_fields: list of str out_path: str Examples -------- >>> inv motion_stats_sheet \ >>> --motion-file-cr "/home/hansel/data/out/{group}/{patient_id}/{session}/rest/artifact_stats/motion_stats.json" \ >>> --crumb-fields "['group', 'patient_id', 'session']" \ >>> --out-path "/home/hansel/data/motion_stats.xls" """ import json from collections import OrderedDict from hansel import Crumb def get_motion_record(mtn_file_cr, crumb_fields): """ Return an OrderedDict of the information found in the `mtn_file_cr` and also `crumb_fields` Crumb argument values.""" stats = json.load(open(str(mtn_file_cr))) outliers = stats[1] motion_norm = stats[3]['motion_norm'] #outliers_hdr = list(outliers.keys()) motion_hdr = ['{}_motion_norm'.format(k) for k in motion_norm.keys()] mtn_record = OrderedDict() for fn in crumb_fields: mtn_record[fn] = mtn_file_cr[fn][0] mtn_record.update(outliers) for hdr, fn in zip(motion_hdr, motion_norm): mtn_record[hdr] = motion_norm[fn] return mtn_record # process the input motion_file_cr = Crumb(motion_file_cr) crumb_fields = [ crf.strip() for crf in crumb_fields[1:-1].replace("'", "").split(',') ] # create the motion records motionstats = [ get_motion_record(stats_file, crumb_fields) for stats_file in motion_file_cr.ls() ] # create a pandas Dataframe out of it df = pd.DataFrame.from_records(motionstats, columns=motionstats[0].keys()) # save it into an excel file df.to_excel(out_path)
# FreeSurfer - Specify the location of the freesurfer folder from nipype.interfaces.freesurfer import FSCommand fs_dir = '/volume/DTI/freesurfer' FSCommand.set_default_subjects_dir(fs_dir) # root path to my data base_dir = "/volume/DTI/Data" # the configuration file path config_file = os.path.join(os.path.dirname(base_dir), 'pypes_config.yml') # define the Crumb filetree of my image database data_path = os.path.join(base_dir, "{subject_id}", "{modality}", "{image}") # create the filetree Crumb object data_crumb = Crumb(data_path, ignore_list=[".*"]) # the different worflows that I will use with any given name attach_functions = { "spm_anat_preproc": attach_spm_anat_preprocessing, "spm_fsl_dti_preprocessing": attach_spm_fsl_dti_preprocessing, } # the specific parts of the `data_crumb` that define a given modality. # **Note**: the key values of this `crumb_arguments` must be the same as expected # in the functions in `attach_functions`. crumb_arguments = { 'anat': [('modality', 'anat_1'), ('image', 'mprage.nii.gz')], 'diff': [('modality', 'diff_1'), ('image', 'DTI.nii')], 'bval': [('modality', 'diff_1'), ('image', 'DTI.bval')], 'bvec': [('modality', 'diff_1'), ('image', 'DTI.bvec')],
def run_canica(ctx, input_crumb, output_dir, cache_dir="", mask_file="", algorithm='canica', comps=30, smooth_fwhm=8, wf_name="", settings_file=""): """ Perform ICA (CanICA or DictLearning) on the files given by `input_crumb`. Parameters ---------- input_crumb: str Crumb path that will give a list of the input files for ICA. The last open argument and its pattern of the `input_crumb` will be used as a reference for the input image file for the ICA. So, put a crumb argument with fixed expression in the basename of the path, e.g.: `/home/hansel/cobre/{sid}/session_0/{img:rest.nii.gz}`. mask_file: str Path to a mask file to select the image regions that This file must have the same dimensions as all the files listed from `input_crumb`. algorithm: str Name of the ICA algorithme. Choices: 'canica', 'dictlearning' comps: int Number of components to extract from the ICA. Outputs ------- The results will be stored in `output_dir`. """ from functools import partial from neuro_pypes.config import update_config from neuro_pypes.io import build_crumb_workflow from neuro_pypes.ica import attach_concat_canica # set the configuration parameters if settings_file: update_config(settings_file) # expanduser in inputs paths: cache_dir = os.path.expanduser(cache_dir) output_dir = os.path.expanduser(output_dir) if not cache_dir: cache_dir = os.path.join(output_dir, '.pypes_cache') # base folder depending if using MR-PET pipeline or PET-only data_crumb = Crumb(input_crumb, ignore_list=['.*']) # more configs if not wf_name: wf_name = algorithm if comps: update_config({wf_name + '_ica.n_components': comps}) update_config({wf_name + '_ica.algorithm': algorithm}) update_config({wf_name + '_ica.mask': mask_file}) update_config({wf_name + '_ica.smoothing_fwhm': smooth_fwhm}) update_config({wf_name + '_ica.do_cca': True}) update_config({wf_name + '_ica.standardize': True}) update_config({wf_name + '_ica.n_init': 20}) update_config({wf_name + '_ica.n_jobs': -1}) update_config({'plot_ica.bg_img': SPM_CANONICAL_BRAIN_2MM}) # the input folder and files files_crumb_args = {} _, arg_name = data_crumb._last_open_arg() files_crumb_args['input_img'] = [(arg_name, data_crumb.patterns.get(arg_name, ""))] kwargs = dict() kwargs['input_connection'] = 'input_img' kwargs['input_node'] = 'selectfiles' # build the workflow wf = build_crumb_workflow( {wf_name: partial(attach_concat_canica, **kwargs)}, data_crumb=data_crumb, in_out_kwargs=files_crumb_args, output_dir=output_dir, cache_dir=cache_dir, ) wf.remove_nodes([wf.get_node('datasink')]) run_wf(wf)
def rename_to_nukid(crumb_path, arg_name, subj_data, verbose_only=False): """ Rename the folders at the `arg_name` level using `subj_data` records. Will rename from subj_data['DCM Folder'] to subj_data['NUK Pseudonym']. Parameters ---------- crumb_path: str Path with Crumbs to the DICOM files, e.g., /home/hansel/data/{subj_id} arg_name: str Name of the argument in `dicom_path` of the subject identification. These names should be the same as the ones in 'DCM Folder' value. outfile: str verbose_only: bool Returns ------- src_dsts: list of 2-tuples of str """ def rename_many(src_dsts, verbose_only=False): """ For each 2-tuple in src_dsts of file/folder paths will rename the first element to the second. Parameters ---------- src_dsts : list of 2-tuple verbose_only: bool Will not perform the operation will only print them. """ for (src, dst) in src_dsts: if not os.path.exists(src): raise IOError('Could not find source file {}.'.format(src)) if os.path.exists(dst): if src == dst: continue else: raise IOError('Destination path {} already exists.'.format(dst)) log.info('mv {} -> {}'.format(src, dst)) if not verbose_only: os.rename(src, dst) if isinstance(subj_data, pd.DataFrame): subjs = subj_data.to_records() else: subjs = subj_data if not Crumb.has_crumbs(Crumb(crumb_path)): raise ValueError('Expected a path with crumb arguments, e.g., ' '"/home/hansel/data/{group}/{sid}/{session}"') crumb = Crumb(os.path.expanduser(os.path.abspath(crumb_path)), ignore_list=['.*']) src_dsts = [] for subj in subjs: src_crs = crumb.replace(**{arg_name: subj['DCM Folder']}).unfold() for src_cr in src_crs: dst_args = src_cr.arg_values.copy() dst_args[arg_name] = subj['nukid'] dst_cr = crumb.replace(**dst_args) if not os.path.exists(src_cr.path): raise IOError('Could not find folder {} for subject {}.'.format(src_cr, subj)) if Crumb.has_crumbs(dst_cr.path): raise KeyError('The destination path should be fully specified, got {}.'.format(dst_cr)) src_dsts.append((src_cr.path, dst_cr.path)) rename_many(src_dsts=src_dsts, verbose_only=verbose_only) return src_dsts
def tmp_crumb(base_dir): crumb = Crumb( "{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") yield crumb.replace(base_dir=base_dir)