Example #1
0
def mktree(crumb: hansel.Crumb, values_map: CrumbArgsSequences) -> List[str]:
    """ Create the tree of folders given the values for the crumb arguments
    of the current crumb path.
    Parameters
    ----------
    crumb: Crumb

    values_map: Sequence[Sequence[2-Tuple[str, str]]] or Sequence[Dict[str, str]]
        The lists of values to substitute each crumb argument that you want.
        Do not leave dependent arguments alone.
        Example: [[('subject_id', 'pat_0001'), ('session_id', 'session_1')],
                  [('subject_id', 'pat_0002'), ('session_id', 'session_1')],
                  ....
                 ]

        Example: [{'subject_id': 'pat_0001', 'session_id': 'session_1'},
                  {'subject_id': 'pat_0002', 'session_id': 'session_1'},
                  ....
                 ]

    Returns
    -------
    paths: list of Paths
        The paths that have been created.
    """
    if values_map is None:
        return [crumb.touch()]

    if not isinstance(values_map, (list, dict)):
        raise TypeError(
            "Expected keys in `values_map` to be a Sequence, got {}.".format(
                type(values_map)))

    paths = []
    for idx, aval in enumerate(values_map):
        if not isinstance(aval, Mapping):
            aval = dict(aval)

        if not set(aval.keys()).issubset(set(crumb.all_args())):
            raise ValueError(
                "Expected keys in `values_map` item to be a subset of {}, got {}."
                .format(crumb.all_args(), aval.keys()))

        rem_deps = crumb._args_open_parents(list(aval.keys()))
        if rem_deps:
            raise KeyError(
                "Expected `values_map` item to not leave crumbs alone,"
                " you forgot to add: {} in item {}".format(rem_deps, idx))

        paths.append(crumb.replace(**aval))

    for path in paths:
        path.touch()

    return paths
Example #2
0
def pandas_fill_crumbs(
        df: 'pandas.DataFrame',
        crumb: hansel.Crumb,
        names_map: CrumbArgsSequences = None) -> Iterator[hansel.Crumb]:
    """ Create a generator of crumbs filled with the `df` column names and `crumb`
    arguments that match or the ones indicated in `names_map`.
    Parameters
    ----------
    df: pandas.DataFrame

    crumb: hansel.Crumb

    names_map: sequence of sequences of 2-tuple or dict[str] -> str
        This is a "DataFrame column name" to "crumb argument name" relation
        dictionary.
        Example: {'Subject ID': 'subject_id'}
        If None will make a dictionary from the open crumbs arguments, e.g.,
        {'subject_id': 'subject_id'}.

        The values of this dict will be used to filter the columns
        in `df` and the crumb arguments in `crumb`.

        You may need to rename the columns of `df` before using this.

    Returns
    -------
    crumbs: generator of crumbs
        Crumbs filled with the data in `df`.
    """
    if names_map is None:
        names_map = {arg_name: arg_name for arg_name in crumb.open_args()}

    nmap = names_map
    if not isinstance(nmap, dict):
        nmap = dict(nmap)

    values_map = (df.pipe(_pandas_rename_cols,
                          nmap).pipe(df_to_valuesmap,
                                     list(crumb.all_args()),
                                     arg_names=list(nmap.values())))

    yield from (crumb.replace(**dict(argvals)) for argvals in values_map)
Example #3
0
def dcm2nii(ctx, input_crumb_path, output_dir, regex='fnmatch', ncpus=3):
    """ Convert all DICOM files within `input_crumb_path` into NifTI in `output_folder`.

    Will copy only the NifTI files reoriented by MRICron's dcm2nii command.
    Will rename the NifTI files that are matched with recognized modalities to the short
    modality name from config.ACQ_PATTERNS.

    Parameters
    ----------
    input_dir: str
        A crumb path str indicating the whole path until the DICOM files.
        Example: '/home/hansel/data/{group}/{subj_id}/{session_id}/{acquisition}/{dcm_file}

        The crumb argument just before the last one will be used as folder container reference
        for the DICOM series.

    output_dir: str
        The root folder path where to save the tree of nifti files.
        Example: '/home/hansel/nifti'
        This function will create the same tree as the crumbs in input_crumb_path, hence
        for the example above the output would have the following structure:
        '/home/hansel/nifti/{group}/{subj_id}/{session_id}/{nifti_file}'

        Where {nifti_file} will take the name from the {acquisition} or from the
        patterns in ACQ_PATTERNS in `config.py` file.

    regex: str
        The regular expression syntax you may want to set in the Crumbs.
        See hansel.Crumb documentation for this.

    ncpus: int
        this says the number of processes that will be launched for dcm2nii in parallel.
    """
    from boyle.dicom.convert import convert_dcm2nii

    input_dir = os.path.expanduser(input_crumb_path)
    output_dir = os.path.expanduser(output_dir)

    if not os.path.exists(output_dir):
        log.info('Creating output folder {}.'.format(output_dir))
        os.makedirs(output_dir)
    else:
        log.info('Output folder {} already exists, this will overwrite/merge '
                 'whatever is inside.'.format(output_dir))

    input_dir = Crumb(input_dir, regex=regex, ignore_list=['.*'])

    if not input_dir.has_crumbs():
        raise ValueError(
            'I am almost sure that this cannot work if you do not '
            'use crumb arguments in the input path, got {}.'.format(input_dir))

    acq_folder_arg, last_in_arg = tuple(input_dir.all_args())[-2:]
    out_arg_names = [
        '{' + arg + '}' for arg in tuple(input_dir.all_args())[:-1]
    ]
    output_dir = Crumb(os.path.join(output_dir, *out_arg_names),
                       regex=regex,
                       ignore_list=['.*'])

    src_dst = []
    acquisitions = input_dir.ls(acq_folder_arg, make_crumbs=True)
    for acq in acquisitions:
        out_args = acq.arg_values.copy()
        acq_out = output_dir.replace(**out_args)

        out_dir = os.path.dirname(acq_out.path)
        out_file = os.path.basename(acq_out.path) + '.nii.gz'
        os.makedirs(out_dir, exist_ok=True)

        src_dst.append((acq.split()[0], out_dir, out_file))

    if ncpus > 1:
        import multiprocessing as mp
        pool = mp.Pool(processes=ncpus)
        results = [
            pool.apply_async(convert_dcm2nii, args=(dr, ss, dst))
            for dr, ss, dst in src_dst
        ]
        _ = [p.get() for p in results]
    else:
        _ = [convert_dcm2nii(path, sess, dst) for path, sess, dst in src_dst]
Example #4
0
def intersection(crumb1: hansel.Crumb,
                 crumb2: hansel.Crumb,
                 on: Iterator[str] = None) -> List[str]:
    """Return an 'inner join' of both given Crumbs, i.e., will return a list of
    Crumbs with common values for the common arguments of both crumbs.

    If `on` is None, will use all the common arguments names of both crumbs.
    Otherwise will use only the elements of `on`. All its items must be in
    both crumbs.

    Returns
    -------
    arg_names: list
        The matching items.

    Parameters
    ----------
    crumb1: hansel.Crumb

    crumb2: hansel.Crumb

    on: str or list of str
        Crumb argument names common to both input crumbs.

    Raises
    ------
    ValueError:
        If an element of `on` does not exists in either `list1` or `list2`.

    KeyError:
        If the result is empty.

    Returns
    -------
    inner_join: list[hansel.Crumb]

    Notes
    -----
    Use with care, ideally the argument matches should be in the same order in
    both crumbs.

    Both crumbs must have at least one matching identifier argument and one
    of those must be the one in `on`.
    """
    if isinstance(on, str):
        on = [on]

    arg_names = list(
        _get_matching_items(list(crumb1.all_args()),
                            list(crumb2.all_args()),
                            items=on))

    if not arg_names:
        raise KeyError(
            "Could not find matching arguments between {} and  {} limited by {}."
            .format(list(crumb1.all_args()), list(crumb2.all_args()), on))

    maps1 = joint_value_map(crumb1, arg_names, check_exists=True)
    maps2 = joint_value_map(crumb2, arg_names, check_exists=True)

    intersect = set(maps1) & (set(maps2))

    return sorted(list(intersect))