def test_abspath(crumb): crumb2 = crumb.abspath(first_is_basedir=False) assert crumb2._path == op.join(op.abspath(op.curdir), crumb._path) assert crumb is not crumb2 assert crumb2.isabs() assert crumb != crumb2 assert 'base_dir' in set(_arg_names(crumb2.path)) crumb3 = crumb.abspath(first_is_basedir=True) assert crumb3._path == op.join(op.abspath(op.curdir), crumb._path.replace('{base_dir}/', '')) assert crumb is not crumb3 assert crumb3.isabs() assert crumb3 != crumb2 home_crumb = Crumb(op.expanduser('~'), ignore_list=['a*']) assert home_crumb._abspath() == op.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore base_dir = BASE_DIR crumb2 = crumb.replace(base_dir=base_dir) crumbc = crumb2.abspath(first_is_basedir=False) assert crumbc == crumb2 assert crumbc is not crumb2
def test_regex_ignorecase(tmp_crumb): assert not os.path.exists(tmp_crumb._path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['SUBJ_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re') # re.match assert len(crumb['subject_id']) == 0 assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore assert not crumb.unfold() crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re.ignorecase') # re.match assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore ucrumb = crumb.unfold()[0] assert crumb._re_method == ucrumb._re_method assert crumb._ignore == ucrumb._ignore re_subj_ids = crumb['subject_id'] assert re_subj_ids == ['SUBJ_{:03}'.format(i) for i in range(20, 30)]
def tmp_tree(request): crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") base_dir = tempfile.mkdtemp(prefix="crumbtest_") crumb2 = crumb.replace(base_dir=base_dir) def fin(): print("teardown tmp_crumb") request.addfinalizer(fin) assert not op.exists(crumb2._path) assert not crumb2.has_files() values_dict = { "session_id": ["session_{:02}".format(i) for i in range(2)], "subject_id": ["subj_{:03}".format(i) for i in range(3)], "modality": ["anat"], "image": ["mprage1.nii", "mprage2.nii", "mprage3.nii"], } paths = mktree(crumb2, list(ParameterGrid(values_dict))) assert op.exists(crumb2.split()[0]) assert not crumb2.has_files() return crumb2, values_dict # provide the fixture value
def test_regex_replace(tmp_crumb): assert not os.path.exists(tmp_crumb._path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch assert tmp_crumb.ls('subject_id:subj_02*', make_crumbs=False) == crumb.ls('subject_id', make_crumbs=False) anat_crumb = crumb.replace(modality='anat') assert anat_crumb.exists() fn_subj_ids = {cr['subject_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert fn_subj_ids == set(['subj_{:03}'.format(i) for i in range(20, 30)]) sessions = {cr['session_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert sessions == set(values_dict['session_id'])
def test_abspath(crumb): crumb2 = crumb.abspath(first_is_basedir=False) assert crumb2._path == os.path.join(os.path.abspath(os.path.curdir), crumb._path) assert crumb is not crumb2 assert crumb2.isabs() assert crumb != crumb2 assert 'base_dir' in set(_arg_names(crumb2.path)) crumb3 = crumb.abspath(first_is_basedir=True) assert crumb3._path == os.path.join(os.path.abspath(os.path.curdir), crumb._path.replace('{base_dir}/', '')) assert crumb is not crumb3 assert crumb3.isabs() assert crumb3 != crumb2 home_crumb = Crumb(os.path.expanduser('~'), ignore_list=['a*']) assert home_crumb._abspath() == os.path.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore base_dir = BASE_DIR crumb2 = crumb.replace(base_dir=base_dir) crumbc = crumb2.abspath(first_is_basedir=False) assert crumbc == crumb2 assert crumbc is not crumb2
def test_ls_and_getitem(): base_dir = os.path.expanduser('~') crumb = Crumb(os.path.join(base_dir, '{user_folder}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set(os.listdir(base_dir)) crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set([d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]) flst = crumb.ls('user_folder', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, str) for f in flst]) assert all([not os.path.exists(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, str) for f in flst]) assert all([os.path.exists(f) or os.path.islink(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=True, check_exists=False) assert all([f.exists() or f.is_symlink() for f in flst]) flst1 = crumb.ls('files', fullpath=False, make_crumbs=False, check_exists=True) flst2 = crumb['files'] assert all([isinstance(f, str) for f in flst1]) assert flst1 == flst2
def test_group_pattern(): crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{image}") base_dir1 = tempfile.mkdtemp(prefix="crumbtest1_") tmp_crumb1 = crumb.replace(base_dir=base_dir1) assert not op.exists(tmp_crumb1._path) assert not tmp_crumb1.has_files() values_dict1 = { "session_id": ["session_{:02}".format(i) for i in range(2)], "subject_id": ["subj_{:03}".format(i) for i in range(3)], "image": ["mprage.nii", "pet.nii", "rest.nii", "remaining"], } _ = mktree(tmp_crumb1, list(ParameterGrid(values_dict1))) patterns = {"anat": "mprage*", "pet": "pet*", "rest": "rest*"} matches = groupby_pattern(tmp_crumb1, "image", patterns) assert patterns.keys() == matches.keys() for name, paths in matches.items(): assert len(paths) == 6 for p in paths: assert isinstance(p, Crumb) assert patterns[name] in p.patterns.values()
def test_regex_replace(tmp_crumb): assert not op.exists(tmp_crumb._path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range( 2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } _ = mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch assert tmp_crumb.ls('subject_id:subj_02*', make_crumbs=False) == \ crumb.ls('subject_id', make_crumbs=False) anat_crumb = crumb.replace(modality='anat') assert anat_crumb.exists() fn_subj_ids = {cr['subject_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert fn_subj_ids == set(['subj_{:03}'.format(i) for i in range(20, 30)]) sessions = {cr['session_id'][0] for cr in anat_crumb.ls('session_id', check_exists=True)} assert sessions == set(values_dict['session_id'])
def motion_stats_sheet(motion_file_cr, crumb_fields): """ Return a pandas.DataFrame with some of the motion statistics obtained from the `statistics_files` output of the nipype.RapidArt found in the hansel.Crumb `motion_file_cr`. Parameters ---------- motion_file_cr: str crumb_fields: list of str Returns ------- df: pandas.DataFrame Examples -------- >>> motion_stats_sheet(motion_file_cr="/home/hansel/data/thomas/out/{group}/{patient_id}/{session}/rest/artifact_stats/motion_stats.json", \ >>> crumb_fields=['group', 'patient_id', 'session']) """ def get_motion_record(mtn_file_cr, crumb_fields): """ Return an OrderedDict of the information found in the `mtn_file_cr` and also `crumb_fields` Crumb argument values.""" stats = json.load(open(str(mtn_file_cr))) outliers = stats[1] motion_norm = stats[3]['motion_norm'] #outliers_hdr = list(outliers.keys()) motion_hdr = ['{}_motion_norm'.format(k) for k in motion_norm.keys()] mtn_record = OrderedDict() for fn in crumb_fields: mtn_record[fn] = mtn_file_cr[fn][0] mtn_record.update(outliers) for hdr, fn in zip(motion_hdr, motion_norm): mtn_record[hdr] = motion_norm[fn] return mtn_record # process the input motion_file_cr = Crumb(motion_file_cr) crumb_fields = [ crf.strip() for crf in crumb_fields[1:-1].replace("'", "").split(',') ] # create the motion records motionstats = [ get_motion_record(stats_file, crumb_fields) for stats_file in motion_file_cr.ls() ] # create a pandas Dataframe out of it df = pd.DataFrame.from_records(motionstats, columns=motionstats[0].keys()) # return the dataframe return df
def test_ls3(): from glob import glob base_dir = os.path.expanduser('~') files = [d for d in glob(os.path.join(base_dir, '*')) if os.path.isfile(d)] crumb = Crumb(os.path.join(files[0], '{user_folder}', '{files}')) lst = crumb.ls('user_folder') assert not lst lst = crumb.ls('files') assert not lst
def test_ls3(): from glob import glob base_dir = op.expanduser('~') files = [d for d in glob(op.join(base_dir, '*')) if op.isfile(d)] crumb = Crumb(op.join(files[0], '{user_folder}', '{files}')) lst = crumb.ls('user_folder') assert not lst lst = crumb.ls('files') assert not lst
def mktree(crumb: hansel.Crumb, values_map: CrumbArgsSequences) -> List[str]: """ Create the tree of folders given the values for the crumb arguments of the current crumb path. Parameters ---------- crumb: Crumb values_map: Sequence[Sequence[2-Tuple[str, str]]] or Sequence[Dict[str, str]] The lists of values to substitute each crumb argument that you want. Do not leave dependent arguments alone. Example: [[('subject_id', 'pat_0001'), ('session_id', 'session_1')], [('subject_id', 'pat_0002'), ('session_id', 'session_1')], .... ] Example: [{'subject_id': 'pat_0001', 'session_id': 'session_1'}, {'subject_id': 'pat_0002', 'session_id': 'session_1'}, .... ] Returns ------- paths: list of Paths The paths that have been created. """ if values_map is None: return [crumb.touch()] if not isinstance(values_map, (list, dict)): raise TypeError( "Expected keys in `values_map` to be a Sequence, got {}.".format( type(values_map))) paths = [] for idx, aval in enumerate(values_map): if not isinstance(aval, Mapping): aval = dict(aval) if not set(aval.keys()).issubset(set(crumb.all_args())): raise ValueError( "Expected keys in `values_map` item to be a subset of {}, got {}." .format(crumb.all_args(), aval.keys())) rem_deps = crumb._args_open_parents(list(aval.keys())) if rem_deps: raise KeyError( "Expected `values_map` item to not leave crumbs alone," " you forgot to add: {} in item {}".format(rem_deps, idx)) paths.append(crumb.replace(**aval)) for path in paths: path.touch() return paths
def tmp_crumb(request): crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") base_dir = tempfile.mkdtemp(prefix='crumbtest_') crumb2 = crumb.replace(base_dir=base_dir) def fin(): print("teardown tmp_crumb") request.addfinalizer(fin) return crumb2 # provide the fixture value
def test_ls_and_getitem(): base_dir = op.expanduser('~') crumb = Crumb(op.join(base_dir, '{user_folder}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set(os.listdir(base_dir)) crumb = Crumb(op.join(base_dir, '{user_folder}', '{files}')) lst = crumb.ls('user_folder', fullpath=False, make_crumbs=False, check_exists=False) assert set(lst) == set([d for d in os.listdir(base_dir) if op.isdir(op.join(base_dir, d))]) flst = crumb.ls('user_folder', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, string_types) for f in flst]) assert all([not op.exists(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=False, check_exists=False) assert all([isinstance(f, string_types) for f in flst]) assert all([op.exists(f) or op.islink(f) for f in flst]) flst = crumb.ls('files', fullpath=True, make_crumbs=True, check_exists=False) assert all([f.exists() or f.is_symlink() for f in flst]) flst1 = crumb.ls('files', fullpath=False, make_crumbs=False, check_exists=True) flst2 = crumb['files'] assert all([isinstance(f, str) for f in flst1]) assert flst1 == flst2
def test_abspath2(): # do a real test with user folder and ignore_list import getpass username = getpass.getuser() user_folder = os.path.join('{base}', username) old_dir = os.getcwd() os.chdir(os.path.join(os.path.expanduser('~'), '..')) home_crumb = Crumb(user_folder, ignore_list=['a*']) assert home_crumb._abspath(first_is_basedir=True) == os.path.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore os.chdir(old_dir)
def test_abspath2(): # do a real test with user folder and ignore_list import getpass username = getpass.getuser() user_folder = op.join('{base}', username) old_dir = os.getcwd() os.chdir(op.join(op.expanduser('~'), '..')) home_crumb = Crumb(user_folder, ignore_list=['a*']) assert home_crumb._abspath(first_is_basedir=True) == op.expanduser('~') abs_home_crumb = home_crumb.abspath() assert abs_home_crumb._ignore == ['a*'] assert abs_home_crumb._ignore == home_crumb._ignore os.chdir(old_dir)
def test_split2(): cr = Crumb('/home/hansel/data/{subj}/{session}/anat.nii') assert cr.split() == ('/home/hansel/data', '{subj}/{session}/anat.nii') cr = Crumb('{base}/home/hansel/data/{subj}/{session}/anat.nii') assert cr.split() == ('', cr.path) cr = Crumb('/home/hansel/data/subj/session/anat.nii') assert cr.split() == (cr.path, '') notvalid_crumb = '/home/hansel/data/{subj_notvalidcrumb/{session}/anat.nii' pytest.raises(ValueError, _split, notvalid_crumb)
def _crumb_fill_dst( src_crumb: hansel.Crumb, dst_crumb: hansel.Crumb ) -> Iterator[Tuple[hansel.Crumb, hansel.Crumb]]: """ Will list `src_crumb` and copy the resulting item arguments into `dst_crumb`. All the defined arguments of `src_crumb.ls()[0]` must define `dst_crumb` entirely and create a path to a file or folder. """ for src in src_crumb.ls(): dst = dst_crumb.copy() copy_args(src, dst) if dst.has_crumbs(): raise AttributeError("Destination crumb still has open arguments, " "expected to fill it. Got {}.".format( str(dst))) yield src, dst
def test_ignore_lst(): import fnmatch base_dir = op.expanduser('~') crumb = Crumb(op.join(base_dir, '{user_folder}', '{files}')) folders = crumb['user_folder']# set(fnmatch.filter(crumb['user_folder'], '.*')) ign_crumb = Crumb(op.join(base_dir, '{user_folder}', '{files}'), ignore_list=('.*',)) ign_folders = ign_crumb['user_folder'] assert set(ign_folders) == set([item for item in folders if not fnmatch.fnmatch(item, '.*')]) assert set(folders) > set(ign_folders) uign_crumb = ign_crumb.unfold() assert ign_crumb._re_method == uign_crumb[0]._re_method assert ign_crumb._ignore == uign_crumb[0]._ignore
def test_lt(tmp_crumb): tst1 = tmp_crumb < tmp_crumb tst2 = tmp_crumb.path < tmp_crumb.path assert (not tst1) assert tst1 == tst2 tmp_crumb2 = Crumb.copy(tmp_crumb) tst1 = tmp_crumb2 < tmp_crumb2 tst2 = tmp_crumb2.path < tmp_crumb2.path assert (not tst1) assert tst1 == tst2 tmp_crumb2 = tmp_crumb2.joinpath('hansel') tst1 = tmp_crumb2 < tmp_crumb2 tst2 = tmp_crumb2.path < tmp_crumb2.path assert (not tst1) assert tst1 == tst2 tmp_crumb._path = os.path.sep + 'aaalex' + tmp_crumb2._path tst1 = tmp_crumb < tmp_crumb2 tst2 = tmp_crumb._path < tmp_crumb2._path assert (tst1) assert tst1 == tst2 tmp_crumb._path = os.path.sep + 'zealous' + tmp_crumb2._path tst1 = tmp_crumb < tmp_crumb2 tst2 = tmp_crumb._path < tmp_crumb2._path assert (not tst1) assert tst1 == tst2 assert tmp_crumb >= tmp_crumb2 assert not tmp_crumb <= tmp_crumb2 assert tmp_crumb > tmp_crumb2 assert not tmp_crumb < tmp_crumb2
def test_ignore_lst(): import fnmatch base_dir = os.path.expanduser('~') crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}')) folders = crumb['user_folder'] # set(fnmatch.filter(crumb['user_folder'], '.*')) ign_crumb = Crumb(os.path.join(base_dir, '{user_folder}', '{files}'), ignore_list=('.*',)) ign_folders = ign_crumb['user_folder'] assert set(ign_folders) == set([item for item in folders if not fnmatch.fnmatch(item, '.*')]) assert set(folders) > set(ign_folders) uign_crumb = ign_crumb.unfold() assert ign_crumb._re_method == uign_crumb[0]._re_method assert ign_crumb._ignore == uign_crumb[0]._ignore
def test_lt(tmp_crumb): tst1 = tmp_crumb < tmp_crumb tst2 = tmp_crumb.path < tmp_crumb.path assert(not tst1) assert tst1 == tst2 tmp_crumb2 = Crumb.copy(tmp_crumb) tst1 = tmp_crumb2 < tmp_crumb2 tst2 = tmp_crumb2.path < tmp_crumb2.path assert(not tst1) assert tst1 == tst2 tmp_crumb2 = tmp_crumb2.joinpath('hansel') tst1 = tmp_crumb2 < tmp_crumb2 tst2 = tmp_crumb2.path < tmp_crumb2.path assert(not tst1) assert tst1 == tst2 tmp_crumb._path = op.sep + 'aaalex' + tmp_crumb2._path tst1 = tmp_crumb < tmp_crumb2 tst2 = tmp_crumb._path < tmp_crumb2._path assert(tst1) assert tst1 == tst2 tmp_crumb._path = op.sep + 'zealous' + tmp_crumb2._path tst1 = tmp_crumb < tmp_crumb2 tst2 = tmp_crumb._path < tmp_crumb2._path assert(not tst1) assert tst1 == tst2 assert tmp_crumb >= tmp_crumb2 assert not tmp_crumb <= tmp_crumb2 assert tmp_crumb > tmp_crumb2 assert not tmp_crumb < tmp_crumb2
def convert(self, value, param, ctx): try: cr = Crumb(path.expanduser(value), ignore_list=['.*']) except ValueError: self.fail('%s is not a valid crumb path.' % value, param, ctx) else: return cr
def test_from_path(crumb): cr = Crumb.copy(crumb) assert cr is not crumb assert cr == crumb cr2 = crumb.from_path(crumb) assert cr2 is not crumb assert cr2 == crumb assert cr2 is not cr assert cr2 == cr cr2 = Crumb.from_path(Path(crumb.path)) assert cr2 is not crumb assert cr2 == crumb assert cr2 is not cr assert cr2 == cr
def test_equal_copy(crumb): crumb2 = Crumb.copy(crumb) assert crumb2 == crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb2._path += '/' assert crumb2 != crumb crumb2._path == os.path.join(crumb._path, '{test}') assert crumb2 != crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb3 = Crumb(crumb.path, ignore_list=['.*']) assert crumb3 != crumb
def test_ls_raises(): crumb = Crumb(os.path.join('{home}', '{user_folder}')) pytest.raises(KeyError, crumb.ls, 'hansel') pytest.raises(NotImplementedError, crumb.ls, 'home') crumb['home'] = os.path.expanduser('~') pytest.raises(ValueError, crumb.ls, '', fullpath=False)
def pandas_fill_crumbs( df: 'pandas.DataFrame', crumb: hansel.Crumb, names_map: CrumbArgsSequences = None) -> Iterator[hansel.Crumb]: """ Create a generator of crumbs filled with the `df` column names and `crumb` arguments that match or the ones indicated in `names_map`. Parameters ---------- df: pandas.DataFrame crumb: hansel.Crumb names_map: sequence of sequences of 2-tuple or dict[str] -> str This is a "DataFrame column name" to "crumb argument name" relation dictionary. Example: {'Subject ID': 'subject_id'} If None will make a dictionary from the open crumbs arguments, e.g., {'subject_id': 'subject_id'}. The values of this dict will be used to filter the columns in `df` and the crumb arguments in `crumb`. You may need to rename the columns of `df` before using this. Returns ------- crumbs: generator of crumbs Crumbs filled with the data in `df`. """ if names_map is None: names_map = {arg_name: arg_name for arg_name in crumb.open_args()} nmap = names_map if not isinstance(nmap, dict): nmap = dict(nmap) values_map = (df.pipe(_pandas_rename_cols, nmap).pipe(df_to_valuesmap, list(crumb.all_args()), arg_names=list(nmap.values()))) yield from (crumb.replace(**dict(argvals)) for argvals in values_map)
def test_copy(crumb): copy = Crumb.copy(crumb) assert crumb is not copy assert crumb == copy copy2 = crumb.copy() assert crumb is not copy2 assert crumb == copy2 assert copy is not copy2 pytest.raises(TypeError, crumb.copy, {})
def make_tree_from_crumb(base_path, crumb_path, crumb_args: [Dict, CrumbArgsSequences]): crumb = Crumb(crumb_path) crumb2 = crumb.replace(base_dir=base_path) assert not os.path.exists(crumb2._path) assert not crumb2.has_files() if isinstance(crumb_args, dict): values_map = list(ParameterGrid(crumb_args)) elif isinstance(crumb_args, list): values_map = crumb_args else: raise TypeError( 'Expected `crumb_args` to be dict or list, got {}.'.format( type(crumb_args))) mktree(crumb2, values_map) assert os.path.exists(crumb2.split()[0]) assert not crumb2.has_files() return crumb2
def test_regex_ignorecase(tmp_crumb): assert not op.exists(tmp_crumb._path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range( 2)], 'subject_id': ['SUBJ_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } _ = mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re') # re.match assert len(crumb['subject_id']) == 0 assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore assert not crumb.unfold() crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re.ignorecase') # re.match assert crumb._re_method == crumb.replace(subject_id='haensel')._re_method assert crumb._ignore == crumb.replace(subject_id='haensel')._ignore ucrumb = crumb.unfold()[0] assert crumb._re_method == ucrumb._re_method assert crumb._ignore == ucrumb._ignore re_subj_ids = crumb['subject_id'] assert re_subj_ids == ['SUBJ_{:03}'.format(i) for i in range(20, 30)]
def groupby_pattern( crumb: hansel.Crumb, arg_name: str, groups: Dict[str, List[hansel.Crumb]]) -> Dict[str, List[hansel.Crumb]]: """Return a dictionary with the matches of `groups` values in the crumb argument `arg_name` in `crumb`. Parameters ---------- crumb: Crumb Crumb to the folder tree. arg_name: str Name of the crumb argument in `crumb` that must be matched with the values of the `groups` dict. groups: dict[str]->str A dict where the keys are group names and the values are regular expressions (fnmatch xor re). Returns ------- grouped: dict[str] -> list[Crumb] Map of paths from groups to the corresponding path matches. """ if arg_name not in crumb: raise KeyError('Crumb {} has no argument {}.'.format(crumb, arg_name)) paths_matched = set() mods = defaultdict(list) for mod_name, pattern in groups.items(): crumb.set_pattern(arg_name, pattern) paths = crumb.ls(arg_name) if paths: mods[mod_name] = paths paths_matched = paths_matched.union(paths) crumb.clear_pattern(arg_name) return mods
def _test_crumb_copy(make_links=False): crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{image}") base_dir1 = tempfile.mkdtemp(prefix="crumb_copy_test1_") tmp_crumb1 = crumb.replace(base_dir=base_dir1) assert not op.exists(tmp_crumb1._path) assert not tmp_crumb1.has_files() values_dict1 = { "session_id": ["session_{:02}".format(i) for i in range(2)], "subject_id": ["subj_{:03}".format(i) for i in range(3)], "image": ["mprage.nii", "pet.nii", "rest.nii", "remaining"], } _ = mktree(tmp_crumb1, list(ParameterGrid(values_dict1))) base_dir2 = tempfile.mkdtemp(prefix="crumb_copy_test2_") tmp_crumb2 = crumb.replace(base_dir=base_dir2) if make_links: copy_func = crumb_link else: copy_func = crumb_copy # make first copy copy_func(tmp_crumb1, tmp_crumb2) assert all([cr.exists() for cr in tmp_crumb2.ls()]) # copy again without exist_ok pytest.raises(FileExistsError, copy_func, tmp_crumb1, tmp_crumb2) assert all([cr.exists() for cr in tmp_crumb2.ls()]) # copy again with exist_ok copy_func(tmp_crumb1, tmp_crumb2, exist_ok=True) assert all([cr.exists() for cr in tmp_crumb2.ls()]) if make_links: assert all([op.islink(cr.path) for cr in tmp_crumb2.ls()])
def test_regex_replace2(tmp_crumb): assert not op.exists(tmp_crumb.path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range( 2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } _ = mktree(tmp_crumb, list(ParameterGrid(values_dict))) # a crumb with the pattern crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch # a crumb without the pattern, the pattern is added later crumb2 = Crumb(tmp_crumb.path, regex='fnmatch') crumb2.set_pattern('subject_id', 'subj_02*') assert crumb['subject_id'] == crumb2['subject_id'] crumb2.clear_pattern('subject_id') assert tmp_crumb['subject_id'] == crumb2['subject_id']
def test_set_patterns(tmp_crumb): assert not os.path.exists(tmp_crumb.path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) # a crumb without the pattern, the pattern is added later crumb2 = Crumb(tmp_crumb.path, regex='fnmatch') crumb3 = crumb2.copy() crumb3.set_patterns() assert crumb2 == crumb3 pytest.raises(KeyError, crumb2.set_patterns, somekey='somevalue') crumb3.set_pattern('subject_id', 'subj_02*') crumb2.set_patterns(subject_id='subj_02*') assert crumb2['subject_id'] == crumb3['subject_id']
def subj_data_from_dicoms(ctx, crumb_path, arg_name, verbose=False): """ Print a list of folder_name -> NUK id. The NUK ID is calculated from the first DICOM file found in the end of the `dicom_path`. Parameters ---------- crumb_path: str Path with Crumbs to the DICOM files, e.g., /home/hansel/data/{subj_id}/{session}/{acq}/{dcm_file} arg_name: str Name of the argument in `dicom_path` of the subj_id Returns ------- subj_data: dict of subj records A dict with records of the information extracted from the DICOM files as well as the calculated NUK Pseudonym. """ if verbose: verbose_switch(verbose) crumb = Crumb(os.path.expanduser(os.path.abspath(crumb_path)), ignore_list=['.*']) if not crumb.has_crumbs(): raise ValueError('Expected a path with crumb arguments, e.g., ' '"/home/hansel/data/{group}/{sid}/{session}"') subj_nuks = [] for path in crumb.ls(arg_name): log.info('Reading DICOMs in {}.'.format(path)) subj_path = path.split()[0] subj = _read_dcm_until_valid(subj_path) if subj is None: log.info('Could not find a valid DICOM in {}.'.format(subj_path)) else: subj_nuks.append(subj) return subj_nuks
def clinical_pype(ctx, wf_name="spm_anat_preproc", base_dir="", cache_dir="", output_dir="", settings_file='', plugin="MultiProc", n_cpus=4): """ Run the basic pipeline. Parameters ---------- wf_name: str base_dir: str cache_dir: str output_dir: str year: str or int plugin: str n_cpus: int """ from neuro_neuro_pypes.datasets import clinical_crumb_workflow data_path = os.path.join(os.path.expanduser(base_dir), '{year}', '{subject_id}', '{session_id}', '{image}') data_crumb = Crumb(data_path, ignore_list=['.*']) atlas_file = HAMM_MNI wf = clinical_crumb_workflow( wf_name=wf_name, data_crumb=data_crumb, cache_dir=os.path.abspath(os.path.expanduser(cache_dir)) if cache_dir else '', output_dir=os.path.abspath(os.path.expanduser(output_dir)) if output_dir else '', config_file=settings_file, params={'atlas_file': atlas_file}, ) if n_cpus > 1: run_wf(wf, plugin=plugin, n_cpus=n_cpus) else: run_wf(wf, plugin=None)
def __init__(self, crumb, templates, **kwargs): """Create an instance with specific input fields. Parameters ---------- crumb: hansel.Crumb If you are using a relative crumb path use a first argument as base directory. This argument will be exposed as an input. Example: {base_dir}/data/raw/{subj_id}... templates : dict[str] -> list of 2-tuples Mapping from string keys to list of crumb arguments in crumb_path that must be replaced to complete the file crumb path. The keys become output fields on the interface. At runtime, the values of the interface inputs will be plugged into these templates, and the resulting strings will be used to select files. """ super(DataCrumb, self).__init__(**kwargs) # Infer the infields and outfields from the template if not Crumb.is_valid(crumb): raise ValueError('Crumb {} is not valid.'.format(crumb)) self._crumb = crumb files_args = get_values_map_keys(templates) undef_args = [ name for name in list(crumb.all_args()) if name not in files_args ] self._infields = undef_args self._outfields = [] self._templates = [] if templates: self._outfields = list(templates) self._templates = templates # Add the dynamic input fields undefined_traits = {} for field in self._infields: self.inputs.add_trait(field, traits.Any) undefined_traits[field] = Undefined self.inputs.trait_set(trait_change_notify=False, **undefined_traits)
def cobre_pype(ctx, wf_name="spm_anat_rest_preproc", base_dir="", cache_dir="", output_dir="", settings_file="", plugin=None, n_cpus=4): """ Run the ParametersA ---------- wf_name: str base_dir: str Base path to where the data is cache_dir: str output_dir: str year: str or int plugin: str n_cpus: int """ from neuro_pypes.datasets import cobre_crumb_workflow data_path = os.path.join(os.path.expanduser(base_dir), '{subject_id}', 'session_1', '{modality}', '{image}') data_crumb = Crumb(data_path, ignore_list=['.*']) wf = cobre_crumb_workflow( wf_name=wf_name, data_crumb=data_crumb, cache_dir=os.path.abspath(os.path.expanduser(cache_dir)) if cache_dir else '', output_dir=os.path.abspath(os.path.expanduser(output_dir)) if output_dir else '', config_file=settings_file, params={'atlas_file': HAMM_MNI}, ) run_wf(wf, plugin=plugin, n_cpus=n_cpus)
def test_equal_copy(crumb): crumb2 = Crumb.copy(crumb) assert crumb2 == crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb2._path += '/' assert crumb2 != crumb crumb2._path == op.join(crumb._path, '{test}') assert crumb2 != crumb crumb2._argval['hansel'] = 'hello' assert crumb2 != crumb crumb3 = Crumb(crumb.path, ignore_list=['.*']) assert crumb3 != crumb
def __init__(self, crumb, templates, **kwargs): """Create an instance with specific input fields. Parameters ---------- crumb: hansel.Crumb If you are using a relative crumb path use a first argument as base directory. This argument will be exposed as an input. Example: {base_dir}/data/raw/{subj_id}... templates : dict[str] -> list of 2-tuples Mapping from string keys to list of crumb arguments in crumb_path that must be replaced to complete the file crumb path. The keys become output fields on the interface. At runtime, the values of the interface inputs will be plugged into these templates, and the resulting strings will be used to select files. """ super(DataCrumb, self).__init__(**kwargs) # Infer the infields and outfields from the template if not Crumb.is_valid(crumb): raise ValueError('Crumb {} is not valid.'.format(crumb)) self._crumb = crumb files_args = get_values_map_keys(templates) undef_args = [name for name in list(crumb.all_args()) if name not in files_args] self._infields = undef_args self._outfields = [] self._templates = [] if templates: self._outfields = list(templates) self._templates = templates # Add the dynamic input fields undefined_traits = {} for field in self._infields: self.inputs.add_trait(field, traits.Any) undefined_traits[field] = Undefined self.inputs.trait_set(trait_change_notify=False, **undefined_traits)
def joint_value_map(crumb: hansel.Crumb, arg_names: Iterator[str], check_exists: bool = True) -> CrumbArgsMap: """Return a list of tuples of crumb argument values of the given `arg_names`. Parameters ---------- crumb: hansel.Crumb arg_names: List[str] check_exists: bool If True will return only a values_map with sets of crumb arguments that fill a crumb to an existing path. Otherwise it won't check if they exist and return all possible combinations. Returns ------- values_map: list of lists of 2-tuples I call values_map what is called `record` in pandas. It is a list of lists of 2-tuples, where each 2-tuple has the shape (arg_name, arg_value). """ values_map = [] for arg_name in arg_names: values_map.append( list((arg_name, arg_value) for arg_value in crumb[arg_name])) if len(arg_names) == 1: return [(i, ) for i in values_map[0]] else: if not check_exists: values_map_checked = values_map[:] else: args_crumbs = [(args, crumb.replace(**dict(args))) for args in set(itertools.product(*values_map))] values_map_checked = [ args for args, cr in args_crumbs if cr.exists() ] return sorted(values_map_checked)
def motion(input: hansel.Crumb, extra: pd.DataFrame, out_file: hansel.Crumb): """ Create in `out_path` an Excel spreadsheet with some of the motion statistics obtained from the `statistics_files` output of the nipype.RapidArt found in the hansel.Crumb `motion_file_cr`. Examples: \n nitap motion -i "/data/hansel/cobre/{sid}/{session}/rest/artifact_stats/motion_stats.json" -o motion.xls\n nitap motion -i "/data/nuk/out/{group}/{sid}/session_0/rest/artifact_stats/motion_stats.json" -o motion.xls\n """ from neuro_pypes.fmri.utils import motion_stats_sheet crumb_args = list(input.open_args()) df = motion_stats_sheet(input, crumb_args) if extra: extra_columns = set(extra.columns.values) matched_args = extra_columns.intersection(crumb_args) if not matched_args: click.fail('Found no matches in the spreadsheet file between: ' '"{}" and "{}".'.format(extra_columns, crumb_args)) df.join(extra, on=matched_args) df.to_excel(out_file) print( 'Successfully wrote the motions spreadsheet in "{}".'.format(out_file))
def test_regex_replace2(tmp_crumb): assert not os.path.exists(tmp_crumb.path) values_dict = { 'session_id': ['session_{:02}'.format(i) for i in range(2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } mktree(tmp_crumb, list(ParameterGrid(values_dict))) # a crumb with the pattern crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch # a crumb without the pattern, the pattern is added later crumb2 = Crumb(tmp_crumb.path, regex='fnmatch') crumb2.set_pattern('subject_id', 'subj_02*') assert crumb['subject_id'] == crumb2['subject_id'] crumb2.clear_pattern('subject_id') assert tmp_crumb['subject_id'] == crumb2['subject_id']
def test_intersection(): crumb = Crumb("{base_dir}/raw/{subject_id}/{session_id}/{modality}/{image}") base_dir1 = tempfile.mkdtemp(prefix="crumbtest1_") tmp_crumb1 = crumb.replace(base_dir=base_dir1) base_dir2 = tempfile.mkdtemp(prefix="crumbtest2_") tmp_crumb2 = crumb.replace(base_dir=base_dir2) assert not op.exists(tmp_crumb1._path) assert not op.exists(tmp_crumb2._path) assert not tmp_crumb1.has_files() assert not tmp_crumb2.has_files() values_dict1 = { "session_id": ["session_{:02}".format(i) for i in range(2)], "subject_id": ["subj_{:03}".format(i) for i in range(3)], "modality": ["anat"], "image": ["mprage1.nii", "mprage2.nii", "mprage3.nii"], } values_dict2 = { "session_id": ["session_{:02}".format(i) for i in range(20)], "subject_id": ["subj_{:03}".format(i) for i in range(30)], "modality": ["anat"], "image": ["mprage1.nii", "mprage2.nii", "mprage3.nii"], } _ = mktree(tmp_crumb1, list(ParameterGrid(values_dict1))) _ = mktree(tmp_crumb2, list(ParameterGrid(values_dict2))) assert op.exists(tmp_crumb1.split()[0]) assert op.exists(tmp_crumb2.split()[0]) assert intersection(tmp_crumb1, tmp_crumb2, on=["subject_id"]) == [ (("subject_id", val),) for val in tmp_crumb1["subject_id"] ] assert intersection(tmp_crumb1, tmp_crumb2, on=["subject_id", "modality"]) == [ (("subject_id", "subj_000"), ("modality", "anat")), (("subject_id", "subj_001"), ("modality", "anat")), (("subject_id", "subj_002"), ("modality", "anat")), ] han_crumb = tmp_crumb2.replace(subject_id="hansel") assert intersection(tmp_crumb1, han_crumb, on=["subject_id"]) == [] s0_crumb = tmp_crumb2.replace(subject_id="subj_000") assert intersection(tmp_crumb1, s0_crumb, on=["subject_id"]) == [(("subject_id", "subj_000"),)] assert intersection(tmp_crumb1, s0_crumb, on=["subject_id", "modality"]) == [ (("subject_id", "subj_000"), ("modality", "anat")) ] assert intersection(tmp_crumb1, s0_crumb, on=["subject_id", "image"]) == [ (("subject_id", "subj_000"), ("image", "mprage1.nii")), (("subject_id", "subj_000"), ("image", "mprage2.nii")), (("subject_id", "subj_000"), ("image", "mprage3.nii")), ] # test raises pytest.raises(KeyError, intersection, tmp_crumb1, tmp_crumb2, on=["hansel"]) pytest.raises(KeyError, intersection, tmp_crumb1, tmp_crumb2, on=["subject_id", "modality", "hansel"]) pytest.raises(KeyError, intersection, tmp_crumb1, Crumb(op.expanduser("~/{files}"))) pytest.raises(KeyError, intersection, tmp_crumb1, Crumb(op.expanduser("~/{files}")), on=["files"])
def dcm2nii(ctx, input_crumb_path, output_dir, regex='fnmatch', ncpus=3): """ Convert all DICOM files within `input_crumb_path` into NifTI in `output_folder`. Will copy only the NifTI files reoriented by MRICron's dcm2nii command. Will rename the NifTI files that are matched with recognized modalities to the short modality name from config.ACQ_PATTERNS. Parameters ---------- input_dir: str A crumb path str indicating the whole path until the DICOM files. Example: '/home/hansel/data/{group}/{subj_id}/{session_id}/{acquisition}/{dcm_file} The crumb argument just before the last one will be used as folder container reference for the DICOM series. output_dir: str The root folder path where to save the tree of nifti files. Example: '/home/hansel/nifti' This function will create the same tree as the crumbs in input_crumb_path, hence for the example above the output would have the following structure: '/home/hansel/nifti/{group}/{subj_id}/{session_id}/{nifti_file}' Where {nifti_file} will take the name from the {acquisition} or from the patterns in ACQ_PATTERNS in `config.py` file. regex: str The regular expression syntax you may want to set in the Crumbs. See hansel.Crumb documentation for this. ncpus: int this says the number of processes that will be launched for dcm2nii in parallel. """ from boyle.dicom.convert import convert_dcm2nii input_dir = os.path.expanduser(input_crumb_path) output_dir = os.path.expanduser(output_dir) if not os.path.exists(output_dir): log.info('Creating output folder {}.'.format(output_dir)) os.makedirs(output_dir) else: log.info('Output folder {} already exists, this will overwrite/merge ' 'whatever is inside.'.format(output_dir)) input_dir = Crumb(input_dir, regex=regex, ignore_list=['.*']) if not input_dir.has_crumbs(): raise ValueError( 'I am almost sure that this cannot work if you do not ' 'use crumb arguments in the input path, got {}.'.format(input_dir)) acq_folder_arg, last_in_arg = tuple(input_dir.all_args())[-2:] out_arg_names = [ '{' + arg + '}' for arg in tuple(input_dir.all_args())[:-1] ] output_dir = Crumb(os.path.join(output_dir, *out_arg_names), regex=regex, ignore_list=['.*']) src_dst = [] acquisitions = input_dir.ls(acq_folder_arg, make_crumbs=True) for acq in acquisitions: out_args = acq.arg_values.copy() acq_out = output_dir.replace(**out_args) out_dir = os.path.dirname(acq_out.path) out_file = os.path.basename(acq_out.path) + '.nii.gz' os.makedirs(out_dir, exist_ok=True) src_dst.append((acq.split()[0], out_dir, out_file)) if ncpus > 1: import multiprocessing as mp pool = mp.Pool(processes=ncpus) results = [ pool.apply_async(convert_dcm2nii, args=(dr, ss, dst)) for dr, ss, dst in src_dst ] _ = [p.get() for p in results] else: _ = [convert_dcm2nii(path, sess, dst) for path, sess, dst in src_dst]
def motion_stats_sheet(ctx, motion_file_cr, crumb_fields, out_path): """ Create in `out_path` an Excel spreadsheet with some of the motion statistics obtained from the `statistics_files` output of the nipype.RapidArt found in the hansel.Crumb `motion_file_cr`. Parameters ---------- motion_file_cr: str crumb_fields: list of str out_path: str Examples -------- >>> inv motion_stats_sheet \ >>> --motion-file-cr "/home/hansel/data/out/{group}/{patient_id}/{session}/rest/artifact_stats/motion_stats.json" \ >>> --crumb-fields "['group', 'patient_id', 'session']" \ >>> --out-path "/home/hansel/data/motion_stats.xls" """ import json from collections import OrderedDict from hansel import Crumb def get_motion_record(mtn_file_cr, crumb_fields): """ Return an OrderedDict of the information found in the `mtn_file_cr` and also `crumb_fields` Crumb argument values.""" stats = json.load(open(str(mtn_file_cr))) outliers = stats[1] motion_norm = stats[3]['motion_norm'] #outliers_hdr = list(outliers.keys()) motion_hdr = ['{}_motion_norm'.format(k) for k in motion_norm.keys()] mtn_record = OrderedDict() for fn in crumb_fields: mtn_record[fn] = mtn_file_cr[fn][0] mtn_record.update(outliers) for hdr, fn in zip(motion_hdr, motion_norm): mtn_record[hdr] = motion_norm[fn] return mtn_record # process the input motion_file_cr = Crumb(motion_file_cr) crumb_fields = [ crf.strip() for crf in crumb_fields[1:-1].replace("'", "").split(',') ] # create the motion records motionstats = [ get_motion_record(stats_file, crumb_fields) for stats_file in motion_file_cr.ls() ] # create a pandas Dataframe out of it df = pd.DataFrame.from_records(motionstats, columns=motionstats[0].keys()) # save it into an excel file df.to_excel(out_path)
def test_regex(tmp_crumb): assert not op.exists(tmp_crumb.path) values_dict = {'session_id': ['session_{:02}'.format(i) for i in range( 2)], 'subject_id': ['subj_{:03}'.format(i) for i in range(100)], 'modality': ['anat'], 'image': ['mprage1.nii'], } _ = mktree(tmp_crumb, list(ParameterGrid(values_dict))) crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:^subj_02.*$}'), regex='re') # re.match re_subj_ids = crumb['subject_id'] assert re_subj_ids == ['subj_{:03}'.format(i) for i in range(20, 30)] assert crumb.ls('subject_id:^subj_02.*$') == crumb.ls('subject_id') crumb = Crumb(tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='fnmatch') # fnmatch fn_subj_ids = crumb['subject_id'] assert fn_subj_ids == re_subj_ids cr_bkp = crumb.copy() assert crumb.ls('subject_id:subj_02*') == crumb.ls('subject_id') assert crumb['subject_id'][0] == crumb.get_first('subject_id') assert crumb.patterns['subject_id'] == cr_bkp.patterns['subject_id'] assert not crumb.ls('subject_id:subj_03*') == crumb.ls('subject_id') assert crumb.patterns['subject_id'] == cr_bkp.patterns['subject_id'] pytest.raises(ValueError, Crumb, tmp_crumb.path.replace('{subject_id}', '{subject_id:subj_02*}'), regex='hansel') crumb2 = Crumb.copy(crumb) assert crumb2._re_method == crumb._re_method assert crumb2._re_args == crumb._re_args assert crumb2.patterns == crumb.patterns assert len(crumb2.patterns) == 1 assert 'subject_id' in crumb2.patterns.keys()