Example #1
0
def prepare_flair_intNorm(flair_prep_dir, out_dir, wd_dir, crash_dir, subjects_sessions, flair_acq, n_cpu=-1):
    out_dir.mkdir(exist_ok=True, parents=True)
    export_version(out_dir)

    wf = Workflow(name="prepare_flair_intNorm")
    wf.base_dir = wd_dir
    wf.config.remove_unnecessary_outputs = False
    wf.config["execution"]["crashdump_dir"] = crash_dir
    wf.config["monitoring"]["enabled"] = "true"

    subjects, sessions = list(zip(*subjects_sessions))
    infosource = Node(niu.IdentityInterface(fields=["subject", "session", "flair_acq"]), name="infosource")
    infosource.iterables = [("subject", subjects),
                            ("session", sessions),
                            ]
    infosource.synchronize = True

    def subject_info_fnc(flair_prep_dir, subject, session, flair_acq):
        from pathlib import Path

        sub_ses = f"sub-{subject}_ses-{session}"
        flair_files = list(Path(flair_prep_dir).glob(
            f"sub-{subject}/ses-{session}/anat/{sub_ses}_acq-{flair_acq}_*_FLAIR_biascorr.nii.gz"))
        assert len(flair_files) == 1, f"Expected one file, but found {flair_files}"
        flair_file = flair_files[0]

        brain_masks = list(Path(flair_prep_dir).glob(
            f"sub-{subject}/ses-{session}/anat/{sub_ses}_space-flair{flair_acq}_desc-brainmask.nii.gz"))
        assert len(brain_masks) > 0, f"Expected one file, but found {brain_masks}"
        brain_mask = brain_masks[0]

        out_list = [flair_file, brain_mask]
        return [str(o) for o in out_list]  # as Path is not taken everywhere

    grabber = Node(niu.Function(input_names=["flair_prep_dir", "subject", "session", "flair_acq"],
                                output_names=["flair_file", "brain_mask"],
                                function=subject_info_fnc),
                   name="grabber"
                   )
    grabber.inputs.flair_prep_dir = flair_prep_dir
    grabber.inputs.flair_acq = flair_acq

    wf.connect([(infosource, grabber, [("subject", "subject"),
                                       ("session", "session"),
                                       ]
                 )
                ]
               )

    # adapted from https://gist.github.com/lebedov/94f1caf8a792d80cd91e7b99c1a0c1d7
    # Intensity normalization - subtract minimum, then divide by difference of maximum and minimum:
    img_range = Node(interface=fsl.ImageStats(op_string='-k %s -R'), name='img_range')
    wf.connect(grabber, "flair_file", img_range, "in_file")
    wf.connect(grabber, "brain_mask", img_range, "mask_file")

    def func(in_stat):
        min_val, max_val = in_stat
        return '-sub %s -div %s' % (min_val, (max_val - min_val))

    stat_to_op_string = Node(interface=niu.Function(input_names=['in_stat'],
                                                    output_names=['op_string'],
                                                    function=func),
                             name='stat_to_op_string', iterfield=['in_stat'])
    wf.connect(img_range, "out_stat", stat_to_op_string, "in_stat")

    flair_normalized = Node(interface=fsl.ImageMaths(), name='flair_normalized')
    wf.connect(stat_to_op_string, "op_string", flair_normalized, "op_string")
    wf.connect(grabber, "flair_file", flair_normalized, "in_file")

    base_directory = str(out_dir.parent)
    out_path_base = str(out_dir.name)
    ds_flair_biascorr_intNorm = Node(DerivativesDataSink(base_directory=base_directory, out_path_base=out_path_base),
                                     name="ds_flair_biascorr_intNorm")
    ds_flair_biascorr_intNorm.inputs.suffix = "FLAIR_biascorrIntNorm"
    wf.connect(flair_normalized, "out_file", ds_flair_biascorr_intNorm, "in_file")
    wf.connect(grabber, "flair_file", ds_flair_biascorr_intNorm, "source_file")

    wf.run(plugin='MultiProc', plugin_args={'n_procs': n_cpu})
Example #2
0
def post_locate_masking(locate_dir, wd_dir, crash_dir, out_dir, subjects_sessions, n_cpu=1):
    out_dir.mkdir(exist_ok=True, parents=True)

    wf = Workflow(name="post_locate_masking")
    wf.base_dir = wd_dir
    wf.config.remove_unnecessary_outputs = False
    wf.config["execution"]["crashdump_dir"] = crash_dir
    wf.config["monitoring"]["enabled"] = "true"

    base_directory = str(out_dir.parent)
    out_path_base = str(out_dir.name)

    subjects, sessions = list(zip(*subjects_sessions))
    infosource = Node(niu.IdentityInterface(fields=["subject", "session"]), name="infosource")
    infosource.iterables = [("subject", subjects),
                            ("session", sessions),
                            ]
    infosource.synchronize = True

    def subject_info_fnc(locate_dir, subject, session):
        from pathlib import Path
        subses = f"sub-{subject}ses-{session}"

        # bianca mask
        search_pattern = f"*/{subses}_biancamask.nii.gz"
        bianca_mask = list(Path(locate_dir).glob(search_pattern))
        if len(bianca_mask) != 1:
            raise Exception(f"Expected one file, but {len(bianca_mask)} found. {search_pattern}")
        bianca_mask = bianca_mask[0]

        # locate output
        search_pattern = f"*/*_results_directory/{subses}_BIANCA_LOCATE_binarylesionmap.nii.gz"
        locate_mask = list(Path(locate_dir).glob(search_pattern))
        if len(locate_mask) != 1:
            raise Exception(f"Expected one file, but {len(locate_mask)} found. {search_pattern}")
        locate_mask = locate_mask[0]

        generic_bids_file = f"sub-{subject}/ses-{session}/anat/sub-{subject}_ses-{session}_FLAIR.nii.gz"
        out_list = [bianca_mask, locate_mask, generic_bids_file]
        return [str(o) for o in out_list]  # as Path is not taken everywhere

    grabber = Node(niu.Function(input_names=["locate_dir", "subject", "session"],
                                output_names=["bianca_mask", "locate_mask", "generic_bids_file"],
                                function=subject_info_fnc),
                   name="grabber"
                   )
    grabber.inputs.locate_dir = locate_dir

    wf.connect([(infosource, grabber, [("subject", "subject"),
                                       ("session", "session"),
                                       ]
                 )
                ]
               )

    locate_output_masked = Node(fsl.ApplyMask(), name="locate_output_masked")
    wf.connect(grabber, "locate_mask", locate_output_masked, "in_file")
    wf.connect(grabber, "bianca_mask", locate_output_masked, "mask_file")

    ds = Node(DerivativesDataSink(base_directory=base_directory, out_path_base=out_path_base), name="ds")
    ds.inputs.suffix = "locateBinaryLesionMap"
    ds.inputs.desc = "biancaMasked"
    wf.connect(locate_output_masked, "out_file", ds, "in_file")
    wf.connect(grabber, "generic_bids_file", ds, "source_file")

    wf.run(plugin='MultiProc', plugin_args={'n_procs': n_cpu})
Example #3
0
def prepare_bianca_data(bids_dir,
                        template_prep_dir,
                        t1w_prep_dir,
                        out_dir,
                        wd_dir,
                        crash_dir,
                        subjects_sessions,
                        flair_acq,
                        n_cpu=-1,
                        omp_nthreads=1,
                        run_wf=True,
                        graph=False):
    out_dir.mkdir(exist_ok=True, parents=True)
    export_version(out_dir)

    wf = Workflow(name="meta_prepare")
    wf.base_dir = wd_dir
    wf.config.remove_unnecessary_outputs = False
    wf.config["execution"]["crashdump_dir"] = crash_dir
    wf.config["monitoring"]["enabled"] = "true"

    subjects, sessions = list(zip(*subjects_sessions))
    infosource = Node(
        niu.IdentityInterface(fields=["subject", "session", "flair_acq"]),
        name="infosource")
    infosource.iterables = [
        ("subject", subjects),
        ("session", sessions),
    ]
    infosource.synchronize = True

    def subject_info_fnc(bids_dir, template_prep_dir, t1w_prep_dir, subject,
                         session, flair_acq):
        from pathlib import Path
        from warnings import warn

        sub_ses = f"sub-{subject}_ses-{session}"
        sub = f"sub-{subject}"

        flair_files = list(
            Path(bids_dir).glob(
                f"sub-{subject}/ses-{session}/anat/{sub_ses}_acq-{flair_acq}_*_FLAIR.nii.gz"
            ))
        assert len(
            flair_files
        ) > 0, f"Expected at least one file, but found {flair_files}"
        if len(flair_files) > 1:
            warn(f"{len(flair_files)} FLAIR files found. Taking first")
        flair_file = flair_files[0]

        generic_bids_file = Path(
            bids_dir
        ) / f"sub-{subject}/ses-{session}/anat/{sub_ses}_T1w.nii.gz"
        flair_space = f"flair{flair_acq}"

        t1w_sub = t1w_prep_dir / f"sub-{subject}/ses-{session}/anat"
        t1w = t1w_sub / f"{sub_ses}_space-tpl_T1w.nii.gz"
        t1w_brain = t1w_sub / f"{sub_ses}_space-tpl_desc-brain_T1w.nii.gz"

        template_sub = Path(template_prep_dir) / f"sub-{subject}/anat/"
        t1w_brainmask = template_sub / f"{sub}_desc-brain_mask.nii.gz"
        t1w_to_MNI_xfm = template_sub / f"{sub}_from-tpl_to-MNI_xfm.mat"
        vent_mask = template_sub / f"{sub}_desc-bianca_ventmask.nii.gz"
        wm_mask = template_sub / f"{sub}_desc-bianca_wmmask.nii.gz"
        distancemap = template_sub / f"{sub}_desc-bianca_ventdistmap.nii.gz"
        perivent_mask = template_sub / f"{sub}_desc-periventmask.nii.gz"
        deepWM_mask = template_sub / f"{sub}_desc-deepWMmask.nii.gz"

        out_list = [
            flair_file, generic_bids_file, flair_space, t1w, t1w_brain,
            t1w_brainmask, t1w_to_MNI_xfm, vent_mask, wm_mask, distancemap,
            perivent_mask, deepWM_mask
        ]
        for f in [
                flair_file, t1w, t1w_brain, t1w_brainmask, t1w_to_MNI_xfm,
                vent_mask, wm_mask, distancemap
        ]:
            if not f.is_file():
                raise FileNotFoundError(f)
        return [str(o) for o in out_list]  # as Path is not taken everywhere

    grabber = Node(niu.Function(input_names=[
        "bids_dir", "template_prep_dir", "t1w_prep_dir", "subject", "session",
        "flair_acq"
    ],
                                output_names=[
                                    "flair_file", "generic_bids_file",
                                    "flair_space", "t1w", "t1w_brain",
                                    "t1w_brainmask", "t1w_to_MNI_xfm",
                                    "vent_mask", "wm_mask", "distancemap",
                                    "perivent_mask", "deepWM_mask"
                                ],
                                function=subject_info_fnc),
                   name="grabber")
    grabber.inputs.bids_dir = bids_dir
    grabber.inputs.t1w_prep_dir = t1w_prep_dir
    grabber.inputs.template_prep_dir = template_prep_dir
    grabber.inputs.flair_acq = flair_acq

    wf.connect([(infosource, grabber, [
        ("subject", "subject"),
        ("session", "session"),
    ])])
    prep_flair_wf = get_prep_flair_wf(omp_nthreads=omp_nthreads)
    wf.connect([(grabber, prep_flair_wf, [
        ("flair_file", "inputnode.flair_file"),
        ("t1w", "inputnode.t1w"),
        ("t1w_brain", "inputnode.t1w_brain"),
        ("t1w_brainmask", "inputnode.t1w_brainmask"),
        ("t1w_to_MNI_xfm", "inputnode.t1w_to_MNI_xfm"),
        ("vent_mask", "inputnode.vent_mask"),
        ("wm_mask", "inputnode.wm_mask"),
        ("distancemap", "inputnode.distancemap"),
        ("perivent_mask", "inputnode.perivent_mask"),
        ("deepWM_mask", "inputnode.deepWM_mask"),
    ])])

    ds_wf = get_ds_wf(out_dir)
    wf.connect([(prep_flair_wf, ds_wf, [
        ("outputnode.flair_biascorr", "inputnode.flair_biascorr"),
        ("outputnode.t1w_brain", "inputnode.t1w_brain"),
        ("outputnode.brainmask", "inputnode.brainmask"),
        ("outputnode.wm_mask", "inputnode.wm_mask"),
        ("outputnode.vent_mask", "inputnode.vent_mask"),
        ("outputnode.distancemap", "inputnode.distancemap"),
        ("outputnode.perivent_mask", "inputnode.perivent_mask"),
        ("outputnode.deepWM_mask", "inputnode.deepWM_mask"),
        ("outputnode.t1w_to_flair", "inputnode.t1w_to_flair"),
        ("outputnode.flair_mniSp", "inputnode.flair_mniSp"),
        ("outputnode.flair_to_mni", "inputnode.flair_to_mni"),
    ]),
                (grabber, ds_wf, [
                    ("flair_file", "inputnode.bids_flair_file"),
                    ("flair_space", "inputnode.space"),
                    ("generic_bids_file", "inputnode.generic_bids_file"),
                ])])

    if graph:
        wf.write_graph("workflow_graph.png", graph2use="exec")
        wf.write_graph("workflow_graph_c.png", graph2use="colored")
    if run_wf:
        wf.run(plugin='MultiProc', plugin_args={'n_procs': n_cpu})
Example #4
0
def bianca_threshold(bianca_dir,
                     mask_dir,
                     flair_prep_dir,
                     wd_dir,
                     crash_dir,
                     out_dir,
                     subjects_sessions,
                     flair_acq,
                     thresholds,
                     n_cpu=1,
                     run_BiancaOverlapMeasures=True):
    out_dir.mkdir(exist_ok=True, parents=True)

    wf = Workflow(name="bianca_threshold")
    wf.base_dir = wd_dir
    wf.config.remove_unnecessary_outputs = False
    wf.config["execution"]["crashdump_dir"] = crash_dir
    wf.config["monitoring"]["enabled"] = "true"

    def format_t(s):
        return f"thresh{s}"

    base_directory = str(out_dir.parent)
    out_path_base = str(out_dir.name)

    subjects, sessions = list(zip(*subjects_sessions))
    infosource = Node(niu.IdentityInterface(fields=["subject", "session"]),
                      name="infosource")
    infosource.iterables = [
        ("subject", subjects),
        ("session", sessions),
    ]
    infosource.synchronize = True

    threshsource = Node(niu.IdentityInterface(fields=["threshold"]),
                        name="threshsource")
    threshsource.iterables = [("threshold", thresholds)]

    def subject_info_fnc(bianca_dir, mask_dir, flair_prep_dir, subject,
                         session, flair_acq, run_BiancaOverlapMeasures):
        from pathlib import Path
        sub_ses = f"sub-{subject}_ses-{session}"
        bianca_lpm = list(
            Path(bianca_dir).glob(
                f"sub-{subject}/ses-{session}/anat/{sub_ses}_acq-{flair_acq}_*_FLAIR_LPM.nii.gz"
            ))[0]

        if run_BiancaOverlapMeasures:
            manual_mask = list(
                Path(mask_dir).glob(
                    f"sub-{subject}/ses-{session}/{sub_ses}_acq-{flair_acq}_*_FLAIR_mask_goldstandard_new.nii.gz"
                ))[0]
        else:
            manual_mask = None

        wm_mask = list(
            Path(flair_prep_dir).glob(
                f"sub-{subject}/ses-{session}/anat/{sub_ses}_space-flair{flair_acq}_desc-wmmask.nii.gz"
            ))[0]
        deepwm_mask = list(
            Path(flair_prep_dir).glob(
                f"sub-{subject}/ses-{session}/anat/{sub_ses}_space-flair{flair_acq}_desc-deepWMmask.nii.gz"
            ))[0]
        pervent_mask = list(
            Path(flair_prep_dir).glob(
                f"sub-{subject}/ses-{session}/anat/{sub_ses}_space-flair{flair_acq}_desc-periventmask.nii.gz"
            ))[0]
        out_list = [
            bianca_lpm, manual_mask, wm_mask, deepwm_mask, pervent_mask
        ]
        return [str(o) for o in out_list]  # as Path is not taken everywhere

    grabber = Node(niu.Function(input_names=[
        "bianca_dir", "mask_dir", "flair_prep_dir", "subject", "session",
        "flair_acq", "run_BiancaOverlapMeasures"
    ],
                                output_names=[
                                    "bianca_lpm", "manual_mask", "wm_mask",
                                    "deepwm_mask", "pervent_mask"
                                ],
                                function=subject_info_fnc),
                   name="grabber")
    grabber.inputs.bianca_dir = bianca_dir
    grabber.inputs.mask_dir = mask_dir
    grabber.inputs.flair_prep_dir = flair_prep_dir
    grabber.inputs.flair_acq = flair_acq
    grabber.inputs.run_BiancaOverlapMeasures = run_BiancaOverlapMeasures

    wf.connect([(infosource, grabber, [
        ("subject", "subject"),
        ("session", "session"),
    ])])
    # threshold lpm
    bianca_lpm_masked = Node(fsl.ApplyMask(), name="bianca_lpm_masked")
    wf.connect(grabber, "bianca_lpm", bianca_lpm_masked, "in_file")
    wf.connect(grabber, "wm_mask", bianca_lpm_masked, "mask_file")

    thresholded_bianca_lpm_mask = Node(fsl.Threshold(),
                                       name="thresholded_bianca_lpm_mask")
    wf.connect(bianca_lpm_masked, "out_file", thresholded_bianca_lpm_mask,
               "in_file")
    wf.connect(threshsource, "threshold", thresholded_bianca_lpm_mask,
               "thresh")
    thresholded_bianca_lpm_mask.inputs.args = "-bin"

    ds_masked = Node(DerivativesDataSink(base_directory=base_directory,
                                         out_path_base=out_path_base),
                     name="ds_masked")
    ds_masked.inputs.desc = "biancamasked"
    wf.connect(bianca_lpm_masked, "out_file", ds_masked, "in_file")
    wf.connect(grabber, "bianca_lpm", ds_masked, "source_file")

    ds_masked_thr_bin = Node(DerivativesDataSink(base_directory=base_directory,
                                                 out_path_base=out_path_base),
                             name="ds_masked_thr_bin")
    ds_masked_thr_bin.inputs.suffix = "biancaLPMmaskedThrBin"
    wf.connect(threshsource, ("threshold", format_t), ds_masked_thr_bin,
               "desc")
    wf.connect(thresholded_bianca_lpm_mask, "out_file", ds_masked_thr_bin,
               "in_file")
    wf.connect(grabber, "bianca_lpm", ds_masked_thr_bin, "source_file")

    def str_to_file_fct(s):
        from pathlib import Path
        out_file = Path.cwd() / "out.txt"
        out_file.write_text(s)
        return str(out_file)

    # volume extraction
    ## total
    cluster_stats_total = Node(BiancaClusterStats(),
                               name="cluster_stats_total")
    cluster_stats_total.inputs.min_cluster_size = 0
    wf.connect(bianca_lpm_masked, "out_file", cluster_stats_total,
               "bianca_output_map")
    wf.connect(threshsource, "threshold", cluster_stats_total, "threshold")
    wf.connect(grabber, "wm_mask", cluster_stats_total, "mask_file")

    str_to_file_total = Node(niu.Function(input_names=["s"],
                                          output_names=["out_file"],
                                          function=str_to_file_fct),
                             name="str_to_file_total")
    wf.connect(cluster_stats_total, "out_stat", str_to_file_total, "s")

    ds_cluster_stats_total = Node(DerivativesDataSink(
        base_directory=base_directory, out_path_base=out_path_base),
                                  name="ds_cluster_stats_total")
    ds_cluster_stats_total.inputs.suffix = "ClusterStatsTotal"
    wf.connect(threshsource, ("threshold", format_t), ds_cluster_stats_total,
               "desc")
    wf.connect(str_to_file_total, "out_file", ds_cluster_stats_total,
               "in_file")
    wf.connect(grabber, "bianca_lpm", ds_cluster_stats_total, "source_file")

    ## deep wm
    cluster_stats_deepwm = Node(BiancaClusterStats(),
                                name="cluster_stats_deepwm")
    cluster_stats_deepwm.inputs.min_cluster_size = 0
    wf.connect(bianca_lpm_masked, "out_file", cluster_stats_deepwm,
               "bianca_output_map")
    wf.connect(threshsource, "threshold", cluster_stats_deepwm, "threshold")
    wf.connect(grabber, "deepwm_mask", cluster_stats_deepwm, "mask_file")

    str_to_file_deepwm = Node(niu.Function(input_names=["s"],
                                           output_names=["out_file"],
                                           function=str_to_file_fct),
                              name="str_to_file_deepwm")
    wf.connect(cluster_stats_deepwm, "out_stat", str_to_file_deepwm, "s")

    ds_cluster_stats_deepwm = Node(DerivativesDataSink(
        base_directory=base_directory, out_path_base=out_path_base),
                                   name="ds_cluster_stats_deepwm")
    ds_cluster_stats_deepwm.inputs.suffix = "ClusterStatsdeepwm"
    wf.connect(threshsource, ("threshold", format_t), ds_cluster_stats_deepwm,
               "desc")
    wf.connect(str_to_file_deepwm, "out_file", ds_cluster_stats_deepwm,
               "in_file")
    wf.connect(grabber, "bianca_lpm", ds_cluster_stats_deepwm, "source_file")

    ## perivent wm
    cluster_stats_perventwm = Node(BiancaClusterStats(),
                                   name="cluster_stats_perventwm")
    cluster_stats_perventwm.inputs.min_cluster_size = 0
    wf.connect(bianca_lpm_masked, "out_file", cluster_stats_perventwm,
               "bianca_output_map")
    wf.connect(threshsource, "threshold", cluster_stats_perventwm, "threshold")
    wf.connect(grabber, "pervent_mask", cluster_stats_perventwm, "mask_file")

    str_to_file_perventwm = Node(niu.Function(input_names=["s"],
                                              output_names=["out_file"],
                                              function=str_to_file_fct),
                                 name="str_to_file_perventwm")
    wf.connect(cluster_stats_perventwm, "out_stat", str_to_file_perventwm, "s")

    ds_cluster_stats_perventwm = Node(DerivativesDataSink(
        base_directory=base_directory, out_path_base=out_path_base),
                                      name="ds_cluster_stats_perventwm")
    ds_cluster_stats_perventwm.inputs.suffix = "ClusterStatsperventwm"
    wf.connect(threshsource, ("threshold", format_t),
               ds_cluster_stats_perventwm, "desc")
    wf.connect(str_to_file_perventwm, "out_file", ds_cluster_stats_perventwm,
               "in_file")
    wf.connect(grabber, "bianca_lpm", ds_cluster_stats_perventwm,
               "source_file")

    if run_BiancaOverlapMeasures:
        overlap = Node(BiancaOverlapMeasures(), name="overlap")
        wf.connect(bianca_lpm_masked, "out_file", overlap, "lesionmask")
        wf.connect(grabber, "manual_mask", overlap, "manualmask")
        wf.connect(threshsource, "threshold", overlap, "threshold")
        overlap.inputs.saveoutput = 1

        ds_overlap = Node(DerivativesDataSink(base_directory=base_directory,
                                              out_path_base=out_path_base),
                          name="ds_overlap")
        ds_overlap.inputs.suffix = "overlap"
        wf.connect(threshsource, ("threshold", format_t), ds_overlap, "desc")
        wf.connect(overlap, "out_file", ds_overlap, "in_file")
        wf.connect(grabber, "bianca_lpm", ds_overlap, "source_file")

    wf.run(plugin='MultiProc', plugin_args={'n_procs': n_cpu})
Example #5
0
def run_bianca_wf(masterfile,
                  out_dir,
                  wd_dir,
                  crash_dir,
                  df,
                  training_subject_idx,
                  query_subject_idx,
                  name="bianca",
                  n_cpu=4,
                  save_classifier=False,
                  trained_classifier_file=None):
    """

    :param masterfile: str
    :param out_dir:
    :param wd_dir:
    :param crash_dir:
    :param df: df
    :param training_subject_idx: training_subject_idx: list of ints, python-style 0-based; training subjects in df
    :param query_subject_idx: list of ints, python-style 0-based; querysubjects in df
    :param name:
    :param n_cpu:
    :param save_classifier: bool
    :param trained_classifier_file: file previously saved with save_classifier; if given, training subjects
    are ignored and classifier file is used in prediction
    :return: None
    """

    if save_classifier and trained_classifier_file:
        raise RuntimeError(
            "save_classifier and trained_classifier_file cannot be set at the same time"
        )
    if trained_classifier_file:
        trained_classifier_file = str(trained_classifier_file)
    #####
    # masterfile information
    expected_header = [
        'flair', 't1w', 'manual_mask', 'mat', 'subject', 'session'
    ]
    assert df.columns.tolist(
    ) == expected_header, f"masterfile columns are off. columns should be \
    {expected_header} but are {df.columns}"

    featuresubset = "1,2"
    brainmaskfeaturenum = "2"
    labelfeaturenum = "3"
    matfeaturenum = "4"

    ######
    # workflow
    wf = Workflow(name=name)

    ######
    # subject info
    inputnode = Node(niu.IdentityInterface(fields=['query_subject_idx']),
                     name='inputnode')
    inputnode.iterables = [("query_subject_idx", query_subject_idx)]
    inputnode.synchronize = True

    def get_query_info_fnc(df, query_subject_idx):
        def get_subjects_info(df, idx):
            return df.iloc[idx].subject.tolist()[0], df.iloc[
                idx].session.tolist()[0], df.iloc[idx].flair.tolist()[0]

        query_subject, query_session, query_flair = get_subjects_info(
            df, [query_subject_idx])
        query_subject_num = query_subject_idx + 1
        return query_subject, query_session, query_flair, query_subject_num

    query_info = Node(niu.Function(input_names=["df", "query_subject_idx"],
                                   output_names=[
                                       'query_subject', 'query_session',
                                       'query_flair', 'query_subject_num'
                                   ],
                                   function=get_query_info_fnc),
                      name="query_info")
    query_info.inputs.df = df
    wf.connect(inputnode, "query_subject_idx", query_info, "query_subject_idx")

    def get_training_info_fnc(df, query_subject_idx, training_subject_idx):
        import numpy as np
        training_subject_idx_clean = training_subject_idx.tolist()
        if query_subject_idx in training_subject_idx_clean:
            training_subject_idx_clean.remove(query_subject_idx)
        training_subjects = df.iloc[training_subject_idx_clean].subject.tolist(
        )
        training_sessions = df.iloc[training_subject_idx_clean].session.tolist(
        )
        training_subject_nums_str = ",".join(
            (np.array(training_subject_idx_clean) + 1).astype(str).tolist())
        return training_subject_idx_clean, training_subject_nums_str, training_subjects, training_sessions

    training_info = Node(niu.Function(
        input_names=["df", "query_subject_idx", "training_subject_idx"],
        output_names=[
            "training_subject_idx", "training_subject_nums_str",
            "training_subjects", "training_sessions"
        ],
        function=get_training_info_fnc),
                         name="training_info")
    training_info.inputs.df = df
    training_info.inputs.training_subject_idx = training_subject_idx
    wf.connect(inputnode, "query_subject_idx", training_info,
               "query_subject_idx")

    bianca = Node(BIANCA(), name="bianca")
    bianca.inputs.masterfile = str(masterfile)
    bianca.inputs.featuresubset = featuresubset
    bianca.inputs.brainmaskfeaturenum = brainmaskfeaturenum
    bianca.inputs.matfeaturenum = matfeaturenum
    bianca.inputs.save_classifier = save_classifier
    wf.connect(query_info, "query_subject_num", bianca, "querysubjectnum")

    if trained_classifier_file:
        bianca.inputs.trained_classifier_file = trained_classifier_file
    else:
        bianca.inputs.labelfeaturenum = labelfeaturenum
        wf.connect(training_info, "training_subject_nums_str", bianca,
                   "trainingnums")

    def classifier_info_fct(masterfile,
                            query_subject,
                            query_session,
                            query_flair,
                            training_subjects=None,
                            training_sessions=None,
                            classifier_file=None):
        d = {
            "masterfile": str(masterfile),
            "query_subject_session": [query_subject, query_session],
            "query_flair": query_flair,
        }
        if training_subjects:
            d["training_subjects_sessions"] = list(
                zip(training_subjects, training_sessions))
        else:
            d["classifier_file"] = classifier_file
        return d

    classifier_info = Node(niu.Function(input_names=[
        "masterfile", "query_subject", "query_session", "query_flair",
        "training_subjects", "training_sessions", "classifier_file"
    ],
                                        output_names=["meta_dict"],
                                        function=classifier_info_fct),
                           name="classifier_info")
    classifier_info.inputs.masterfile = masterfile
    wf.connect(query_info, "query_subject", classifier_info, "query_subject")
    wf.connect(query_info, "query_session", classifier_info, "query_session")
    wf.connect(query_info, "query_flair", classifier_info, "query_flair")
    if trained_classifier_file:
        classifier_info.inputs.classifier_file = trained_classifier_file
    else:
        wf.connect(training_info, "training_subjects", classifier_info,
                   "training_subjects")
        wf.connect(training_info, "training_sessions", classifier_info,
                   "training_sessions")

    ds = Node(DerivativesDataSink(base_directory=str(out_dir.parent),
                                  out_path_base=str(out_dir.name)),
              name="ds")
    ds.inputs.suffix = "LPM"
    wf.connect(bianca, "out_file", ds, "in_file")
    wf.connect(query_info, "query_flair", ds, "source_file")
    wf.connect(classifier_info, "meta_dict", ds, "meta_dict")

    if save_classifier:
        ds_clf = Node(DerivativesDataSink(base_directory=str(out_dir.parent),
                                          out_path_base=str(out_dir.name)),
                      name="ds_clf")
        ds_clf.inputs.suffix = "classifier"
        wf.connect(bianca, "classifier_file", ds_clf, "in_file")
        wf.connect(query_info, "query_flair", ds_clf, "source_file")

        ds_clf_labels = Node(DerivativesDataSink(
            base_directory=str(out_dir.parent),
            out_path_base=str(out_dir.name)),
                             name="ds_clf_labels")
        ds_clf_labels.inputs.suffix = "classifier_labels"
        wf.connect(bianca, "classifier_labels_file", ds_clf_labels, "in_file")
        wf.connect(query_info, "query_flair", ds_clf_labels, "source_file")

    wf.base_dir = wd_dir
    wf.config.remove_unnecessary_outputs = False
    wf.config["execution"]["crashdump_dir"] = crash_dir
    wf.config["monitoring"]["enabled"] = "true"
    # wf.write_graph("workflow_graph.png", graph2use="exec")
    # wf.write_graph("workflow_graph_c.png", graph2use="colored")
    wf.run(plugin='MultiProc', plugin_args={'n_procs': n_cpu})