def test_pandas_fill_crumbs(tmp_tree): tmp_crumb = tmp_tree[0] recs = tmp_crumb.values_map('image') df = pd.DataFrame.from_dict(valuesmap_to_dict(recs)) df_crumbs = list(pandas_fill_crumbs(df, tmp_crumb)) uf_crumbs = tmp_crumb.unfold() assert df_crumbs == uf_crumbs
def test_values_map_from_df(tmp_tree): tmp_crumb = tmp_tree[0] recs = tmp_crumb.values_map('image') adict = valuesmap_to_dict(recs) df1 = pd.DataFrame.from_records([dict(rec) for rec in recs]) df2 = pd.DataFrame.from_dict(adict) assert all(df1 == df2) assert all(pd.DataFrame.from_dict ([dict(rec) for rec in recs]) == pd.DataFrame.from_records([dict(rec) for rec in recs]))
def test_valuesmap_to_dict(tmp_tree): tmp_crumb = tmp_tree[0] values_dict = tmp_tree[1] recs = tmp_crumb.values_map("image") n_recs = len(recs) dicts = valuesmap_to_dict(recs) for arg_name in dicts: assert len(dicts[arg_name]) == n_recs assert values_dict == {arg_name: rm_dups(arg_values) for arg_name, arg_values in dicts.items()} key_subset = ["subject_id", "session_id"] dicts2 = append_dict_values([dict(rec) for rec in recs], keys=key_subset) for key in key_subset: assert dicts2[key] == dicts[key] assert tmp_crumb.values_map("image") == tmp_crumb.values_map()
def crumb_wf(work_dir, data_crumb, output_dir, file_templates, wf_name="main_workflow"): """ Creates a workflow with the `subject_session_file` input nodes and an empty `datasink`. The 'datasink' must be connected afterwards in order to work. Parameters ---------- work_dir: str Path to the workflow temporary folder data_crumb: hansel.Crumb The crumb until the subject files. Example: Crumb('/home/hansel/data/{subject_id}/{session_id}/{modality}/{image_file}) output_dir: str Path to where the datasink will leave the results. file_templates: Dict[str -> list of 2-tuple] Maps of crumb argument values to specify each file in the `data_crumb`. Example: {'anat': [('modality', 'anat'), ('image_file', 'anat_hc.nii.gz')], 'pet': [('modality', 'pet'), ('image_file', 'pet_fdg.nii.gz')], } wf_name: str Name of the main workflow Returns ------- wf: Workflow """ # create the root workflow wf = pe.Workflow(name=wf_name, base_dir=work_dir) # datasink datasink = pe.Node(DataSink(parameterization=False, base_directory=output_dir,), name="datasink") # input workflow # (work_dir, data_crumb, crumb_arg_values, files_crumb_args, wf_name="input_files"): select_files = pe.Node(DataCrumb(crumb=data_crumb, templates=file_templates, raise_on_empty=False), name='selectfiles') # basic file name substitutions for the datasink undef_args = select_files.interface._infields substitutions = [(name, "") for name in undef_args] substitutions.append(("__", "_")) datasink.inputs.substitutions = extend_trait_list(datasink.inputs.substitutions, substitutions) # Infosource - the information source that iterates over crumb values map from the filesystem infosource = pe.Node(interface=IdentityInterface(fields=undef_args), name="infosrc") infosource.iterables = list(valuesmap_to_dict(joint_value_map(data_crumb, undef_args)).items()) infosource.synchronize = True # connect the input_wf to the datasink joinpath = pe.Node(joinstrings(len(undef_args)), name='joinpath') # Connect the infosrc node to the datasink input_joins = [(name, 'arg{}'.format(arg_no+1)) for arg_no, name in enumerate(undef_args)] wf.connect([ (infosource, select_files, [(field, field) for field in undef_args]), (select_files, joinpath, input_joins), (joinpath, datasink, [("out", "container")]), ], ) return wf
def crumb_wf(work_dir, data_crumb, output_dir, file_templates, wf_name="main_workflow"): """ Creates a workflow with the `subject_session_file` input nodes and an empty `datasink`. The 'datasink' must be connected afterwards in order to work. Parameters ---------- work_dir: str Path to the workflow temporary folder data_crumb: hansel.Crumb The crumb until the subject files. Example: Crumb('/home/hansel/data/{subject_id}/{session_id}/{modality}/{image_file}) output_dir: str Path to where the datasink will leave the results. file_templates: Dict[str -> list of 2-tuple] Maps of crumb argument values to specify each file in the `data_crumb`. Example: {'anat': [('modality', 'anat'), ('image_file', 'anat_hc.nii.gz')], 'pet': [('modality', 'pet'), ('image_file', 'pet_fdg.nii.gz')], } wf_name: str Name of the main workflow Returns ------- wf: Workflow """ # create the root workflow wf = pe.Workflow(name=wf_name, base_dir=work_dir) # datasink datasink = pe.Node(DataSink( parameterization=False, base_directory=output_dir, ), name="datasink") # input workflow # (work_dir, data_crumb, crumb_arg_values, files_crumb_args, wf_name="input_files"): select_files = pe.Node(DataCrumb(crumb=data_crumb, templates=file_templates, raise_on_empty=False), name='selectfiles') # basic file name substitutions for the datasink undef_args = select_files.interface._infields substitutions = [(name, "") for name in undef_args] substitutions.append(("__", "_")) datasink.inputs.substitutions = extend_trait_list( datasink.inputs.substitutions, substitutions) # Infosource - the information source that iterates over crumb values map from the filesystem infosource = pe.Node(interface=IdentityInterface(fields=undef_args), name="infosrc") infosource.iterables = list( valuesmap_to_dict(joint_value_map(data_crumb, undef_args)).items()) infosource.synchronize = True # connect the input_wf to the datasink joinpath = pe.Node(joinstrings(len(undef_args)), name='joinpath') # Connect the infosrc node to the datasink input_joins = [(name, 'arg{}'.format(arg_no + 1)) for arg_no, name in enumerate(undef_args)] wf.connect([ (infosource, select_files, [(field, field) for field in undef_args]), (select_files, joinpath, input_joins), (joinpath, datasink, [("out", "container")]), ], ) return wf