コード例 #1
0
ファイル: conf_parser.py プロジェクト: schwarty/pypreprocess
def _generate_preproc_pipeline(jobfile, dataset_dir=None,
                               options_callback=None, **kwargs):
    """
    Generate pipeline (i.e subject factor + preproc params) from
    config file.

    Returns
    -------
    subjects: list of `SubjectData` objects
        subject list

    preproc_params: dict
        preproc parameters

    """

    # read config file
    jobfile = os.path.abspath(jobfile)
    options = _parse_job(jobfile, **kwargs)
    options = _del_nones_from_dict(options)

    # generate subject conf
    if dataset_dir is None:
        assert "dataset_dir" in options, (
            "dataset_dir not specified (neither in jobfile"
            " nor in this function call)")
        dataset_dir = options["dataset_dir"]
    else:
        assert not dataset_dir is None, (
            "dataset_dir not specified (neither in jobfile"
            " nor in this function call")

    assert dataset_dir
    options["dataset_dir"] = dataset_dir

    if not isinstance(dataset_dir, basestring):
        tmp = [_generate_preproc_pipeline(
                jobfile, dataset_dir=dsd,
                options_callback=options_callback, **kwargs)
               for dsd in dataset_dir]

        subjects = [subject for x in tmp for subject in x[0]]

        return subjects, tmp[0][1]

    if options_callback:
        options = options_callback(options)
        dataset_dir = options["dataset_dir"]

    dataset_dir = _expand_path(dataset_dir)
    assert os.path.isdir(dataset_dir), (
        "dataset_dir %s doesn't exist" % dataset_dir)

    # output dir
    output_dir = _expand_path(options["output_dir"],
                              relative_to=dataset_dir)
    if output_dir is None:
        raise RuntimeError(
            ("Could not expand 'output_dir' specified in %s: invalid"
             " path %s (relative to directory %s)") % (
                jobfile, options["output_dir"], dataset_dir))

    # dataset description
    dataset_description = options.get("dataset_description", None)

    # how many subjects ?
    subjects = []
    nsubjects = options.get('nsubjects', np.inf)
    exclude_these_subject_ids = options.get(
        'exclude_these_subject_ids', [])
    include_only_these_subject_ids = options.get(
        'include_only_these_subject_ids', [])

    def _ignore_subject(subject_id):
        """
        Ignore given subject_id ?

        """

        if subject_id in exclude_these_subject_ids:
            return True
        elif len(include_only_these_subject_ids
               ) and not subject_id in include_only_these_subject_ids:
            return True
        else:
            return False

    # subject data factory
    subject_dir_wildcard = os.path.join(dataset_dir,
                                        options.get("subject_dirs",
                                                    "*"))
    sessions = [k for k in options.keys() if re.match("session_.+_func", k)]
    session_ids = [re.match("session_(.+)_func", session).group(1)
                   for session in sessions]
    assert len(sessions) > 0
    subject_data_dirs = sorted(glob.glob(subject_dir_wildcard))
    assert subject_data_dirs, (
        "No subject directories found for wildcard: %s" % (
            subject_dir_wildcard))
    for subject_data_dir in subject_data_dirs:
        if len(subjects) == nsubjects:
            break

        subject_id = os.path.basename(subject_data_dir)
        if _ignore_subject(subject_id):
            continue

        subject_output_dir = os.path.join(output_dir, subject_id)

        # grab functional data
        func = []
        sess_output_dirs = []
        skip_subject = False
        for session in sessions:
            session = options[session]
            sess_func_wildcard = os.path.join(subject_data_dir, session)
            sess_func = sorted(glob.glob(sess_func_wildcard))
            if not sess_func:
                print("subject %s: No func images found for"
                      " wildcard %s" % (subject_id, sess_func_wildcard))
                skip_subject = True
                break
            sess_dir = os.path.dirname(sess_func[0])
            if len(sess_func) == 1:
                sess_func = sess_func[0]
            func.append(sess_func)

            # session output dir
            if os.path.basename(sess_dir) != os.path.basename(
                subject_output_dir):
                sess_output_dir = os.path.join(
                    subject_output_dir, get_relative_path(subject_data_dir,
                                                          sess_dir))
            else:
                sess_output_dir = subject_output_dir
            if not os.path.exists(sess_output_dir):
                os.makedirs(sess_output_dir)
            sess_output_dirs.append(sess_output_dir)

        if skip_subject:
            print "Skipping subject %s" % subject_id
            continue

        # grab anat
        anat = None
        if not options.get("anat", None) is None:
            anat_wildcard = os.path.join(subject_data_dir, options['anat'])
            anat = glob.glob(anat_wildcard)

            # skip subject if anat absent
            if len(anat) < 1:
                print (
                    "subject %s: anat image matching %s not found!; skipping"
                    " subject" % (subject_id, anat_wildcard))
                continue

            anat = anat[0]
            anat_dir = os.path.dirname(anat)
        else:
            anat = None
            anat_dir = ""

        # anat output dir
        anat_output_dir = None
        if anat_dir:
            anat_output_dir = os.path.join(subject_output_dir,
                                           get_relative_path(subject_data_dir,
                                                             anat_dir))

            if not os.path.exists(anat_output_dir):
                os.makedirs(anat_output_dir)

        # make subject data
        subject_data = SubjectData(subject_id=subject_id, func=func, anat=anat,
                                   output_dir=subject_output_dir,
                                   session_output_dirs=sess_output_dirs,
                                   anat_output_dir=anat_output_dir,
                                   session_id=session_ids,
                                   data_dir=subject_data_dir)

        subjects.append(subject_data)

    print "No subjects globbed (dataset_dir=%s, subject_dir_wildcard=%s" % (
        dataset_dir, subject_dir_wildcard)

    # preproc parameters
    preproc_params = {
        "spm_dir": options.get("spm_dir", None),
        "matlab_exec": options.get("matlab_exec", None),
        "report": options.get("report", True),
        "output_dir": output_dir,
        "dataset_id": options.get("dataset_id", dataset_dir),
        "n_jobs": options.get("n_jobs", None),
        "caching": options.get("caching", True),
        "cv_tc": options.get("cv_tc", True),
        "dataset_description": dataset_description,
        "slice_timing_software": options.get("slice_timing_software", "spm"),
        "realign_software": options.get("realign_software", "spm"),
        "coregister_software": options.get("coregister_software", "spm"),
        }

    # delete orientation meta-data ?
    preproc_params['deleteorient'] = options.get(
        "deleteorient", False)

    # configure slice-timing correction node
    preproc_params["slice_timing"] = not options.get(
        "disable_slice_timing", False)
    # can't do STC without TR
    if preproc_params["slice_timing"]:
        preproc_params.update(dict((k, options.get(k, None))
                                   for k in ["TR", "TA", "slice_order",
                                             "interleaved"]))
        if preproc_params["TR"] is None:
            preproc_params["slice_timing"] = False

    # configure motion correction node
    preproc_params["realign"] = not options.get("disable_realign", False)
    if preproc_params["realign"]:
        preproc_params['realign_reslice'] = options.get("reslice_realign",
                                                        False)
        preproc_params['register_to_mean'] = options.get("register_to_mean",
                                                         True)

    # configure coregistration node
    preproc_params["coregister"] = not options.get("disable_coregister",
                                                   False)
    if preproc_params["coregister"]:
        preproc_params['coregister_reslice'] = options["coregister_reslice"]
        preproc_params['coreg_anat_to_func'] = not options.get(
            "coreg_func_to_anat", True)

    # configure tissue segmentation node
    preproc_params["segment"] = not options.get("disable_segment", False)
    if preproc_params["segment"]:
        pass  # XXX pending code...

    # configure normalization node
    preproc_params["normalize"] = not options.get(
        "disable_normalize", False)
    preproc_params['func_write_voxel_sizes'] = options.get(
        "func_voxel_sizes", [3, 3, 3])
    preproc_params['anat_write_voxel_sizes'] = options.get(
        "anat_voxel_sizes", [1, 1, 1])
    preproc_params['dartel'] = options.get("dartel", False)

    # configure smoothing node
    preproc_params["fwhm"] = options.get("fwhm", 0.)

    return subjects, preproc_params
コード例 #2
0
ファイル: conf_parser.py プロジェクト: banilo/pypreprocess
def _generate_preproc_pipeline(config_file, dataset_dir=None, output_dir=None, options_callback=None, **kwargs):
    """
    Generate pipeline (i.e subject factor + preproc params) from
    config file.

    dataset_dir, and output_dir can be specified in config_file, passed as
    function argument, or exported in environ variables like so:
    OUTPUT_DIR=/some/dir, etc.

    Returns
    -------
    subjects: list of `SubjectData` objects
        subject list

    preproc_params: dict
        preproc parameters

    """
    # read config file
    config_file = os.path.abspath(config_file)
    options = _parse_job(config_file, **kwargs)
    options = _del_nones_from_dict(options)

    # sanitize output_dir and dataset_dir
    dataset_dir = os.environ.get("DATASET_DIR", dataset_dir)
    output_dir = os.environ.get("OUTPUT_DIR", output_dir)
    for item in ["dataset_dir", "output_dir"]:
        val = eval(item)
        if val is None:
            if not item in options:
                # get value from environ (if exists)
                val = os.environ.get(item.upper(), val)
                if val is None:
                    raise ValueError(
                        (
                            "%s not specified (neither in environ variable, "
                            "nor in config_file nor in this function "
                            "call)"
                        )
                        % item
                    )

                # set value from environ
                if item == "dataset_dir":
                    dataset_dir = val
                elif item == "output_dir":
                    output_dir = val
    options["dataset_dir"] = dataset_dir = dataset_dir if not dataset_dir is None else options["dataset_dir"]
    options["output_dir"] = output_dir = output_dir if not output_dir is None else options["output_dir"]
    assert options["dataset_dir"]
    assert options["output_dir"]

    # load data from multiple dataset_dirs
    if not isinstance(dataset_dir, basestring):
        kwargs["output_dir"] = output_dir
        tmp = [
            _generate_preproc_pipeline(config_file, dataset_dir=dsd, options_callback=options_callback, **kwargs)
            for dsd in dataset_dir
        ]
        subjects = [subject for x in tmp for subject in x[0]]
        return subjects, tmp[0][1]

    # invoke callback
    if options_callback:
        options = options_callback(options)
        if dataset_dir is None:
            dataset_dir = options.get("dataset_dir", None)
        if output_dir is None:
            output_dir = options.get("output_dir", None)

    # check dataset_dir
    dataset_dir = _expand_path(dataset_dir)
    if not os.path.isdir(dataset_dir):
        raise OSError("dataset_dir %s doesn't exist" % dataset_dir)

    # check output_dir
    output_dir = _expand_path(options["output_dir"], relative_to=dataset_dir)
    if output_dir is None:
        raise OSError(
            ("Could not expand 'output_dir' specified in %s: invalid" " path %s (relative to directory %s)")
            % (config_file, options["output_dir"], dataset_dir)
        )

    # dataset description
    dataset_description = options.get("dataset_description", None)

    # preproc parameters
    preproc_params = {
        "spm_dir": options.get("spm_dir", None),
        "matlab_exec": options.get("matlab_exec", None),
        "report": options.get("report", True),
        "output_dir": output_dir,
        "dataset_id": options.get("dataset_id", dataset_dir),
        "n_jobs": options.get("n_jobs", None),
        "caching": options.get("caching", True),
        "tsdiffana": options.get("tsdiffana", True),
        "dataset_description": dataset_description,
        "slice_timing_software": options.get("slice_timing_software", "spm"),
        "realign_software": options.get("realign_software", "spm"),
        "coregister_software": options.get("coregister_software", "spm"),
        "smooth_software": options.get("smooth_software", "spm"),
    }

    # delete orientation meta-data ?
    preproc_params["deleteorient"] = options.get("deleteorient", False)

    # configure slice-timing correction node
    preproc_params["slice_timing"] = not options.get("disable_slice_timing", False)
    # can't do STC without TR
    if preproc_params["slice_timing"]:
        preproc_params.update(dict((k, options.get(k, None)) for k in ["TR", "TA", "slice_order", "interleaved"]))
        if preproc_params["TR"] is None:
            preproc_params["slice_timing"] = False

    # configure motion correction node
    preproc_params["realign"] = not options.get("disable_realign", False)
    if preproc_params["realign"]:
        preproc_params["realign_reslice"] = options.get("reslice_realign", False)
        preproc_params["register_to_mean"] = options.get("register_to_mean", True)

    # configure coregistration node
    preproc_params["coregister"] = not options.get("disable_coregister", False)
    if preproc_params["coregister"]:
        preproc_params["coregister_reslice"] = options.get("coregister_reslice")
        preproc_params["coreg_anat_to_func"] = not options.get("coreg_func_to_anat", True)

    # configure tissue segmentation node
    preproc_params["segment"] = not options.get("disable_segment", False)
    preproc_params["newsegment"] = options.get("newsegment", False) and preproc_params["segment"]

    # configure normalization node
    preproc_params["normalize"] = not options.get("disable_normalize", False)

    # configure output voxel sizes
    for brain in ["func", "anat"]:
        k = "%s_write_voxel_size" % brain
        ks = k + "s"
        if k in options:
            assert not ks in options, (
                "Both %s and %s specified in ini file. Please use only one of " "them, they mean thesame thing!"
            )
            options[ks] = options.pop(k)
        preproc_params[ks] = options.get(ks, [[3, 3, 3], [1, 1, 1]][brain == "anat"])

    # configure dartel
    preproc_params["dartel"] = options.get("dartel", False)
    preproc_params["output_modulated_tpms"] = options.get("output_modulated_tpms", False)

    # can't do dartel without newsegment!
    if not preproc_params["newsegment"]:
        preproc_params["newsegment"] = preproc_params["dartel"]

    # configure smoothing node
    preproc_params["fwhm"] = options.get("fwhm", 0.0)
    preproc_params["anat_fwhm"] = options.get("anat_fwhm", 0.0)

    # how many subjects ?
    subjects = []
    nsubjects = options.get("nsubjects", np.inf)
    exclude_these_subject_ids = options.get("exclude_these_subject_ids", [])
    include_only_these_subject_ids = options.get("include_only_these_subject_ids", [])

    def _ignore_subject(subject_id):
        """
        Ignore given subject_id ?

        """
        if subject_id in exclude_these_subject_ids:
            return True
        elif len(include_only_these_subject_ids) and not subject_id in include_only_these_subject_ids:
            return True
        else:
            return False

    # subject data factory
    subject_dir_wildcard = os.path.join(dataset_dir, options.get("subject_dirs", "*"))
    sess_func_wildcards = [k for k in options.keys() if re.match("session_.+_func", k)]
    sess_onset_wildcards = [k for k in options.keys() if re.match("session_.+_onset", k)]
    sess_ids = [re.match("session_(.+)_func", session).group(1) for session in sess_func_wildcards]
    subject_data_dirs = [x for x in sorted(glob.glob(subject_dir_wildcard)) if os.path.isdir(x)]
    if not subject_data_dirs:
        warnings.warn("No subject directories found for wildcard: %s" % (subject_dir_wildcard))
        return [], preproc_params

    for subject_data_dir in subject_data_dirs:
        if len(subjects) == nsubjects:
            # we've had enough subjects already; end
            break
        subject_id = os.path.basename(subject_data_dir)
        if _ignore_subject(subject_id):
            continue
        subject_output_dir = os.path.join(output_dir, subject_id)

        # grab functional data
        func = []
        sess_output_dirs = []
        skip_subject = False
        onset = []
        for s, sess_func_wildcard in enumerate(sess_func_wildcards):
            o = None
            if s < len(sess_onset_wildcards):
                sess_onset_wildcard = sess_onset_wildcards[s]
                sess_onset_wildcard = options[sess_onset_wildcard]
                sess_onset_wildcard = os.path.join(subject_data_dir, sess_onset_wildcard)
                sess_onset = sorted(glob.glob(sess_onset_wildcard))
                if len(sess_onset) > 1:
                    raise ValueError
                if len(sess_onset) > 0:
                    o = sess_onset[0]
            onset.append(o)
            sess_func_wildcard = options[sess_func_wildcard]
            sess_func_wildcard = os.path.join(subject_data_dir, sess_func_wildcard)
            sess_func = sorted(glob.glob(sess_func_wildcard))

            # skip session if no data found
            if not sess_func:
                warnings.warn(
                    ("subject %s: No func images found for" " wildcard %s" % (subject_id, sess_func_wildcard))
                )
                continue
            sess_dir = os.path.dirname(sess_func[0])
            if len(sess_func) == 1:
                sess_func = sess_func[0]
            func.append(sess_func)

            # session output dir
            if os.path.basename(sess_dir) != os.path.basename(subject_output_dir):
                sess_output_dir = os.path.join(subject_output_dir, get_relative_path(subject_data_dir, sess_dir))
            else:
                sess_output_dir = subject_output_dir
            if not os.path.exists(sess_output_dir):
                os.makedirs(sess_output_dir)
            sess_output_dirs.append(sess_output_dir)

        # something is wrong with this guy, skip
        if skip_subject:
            warnings.warn("Skipping subject %s" % subject_id)
            continue

        # grab anat
        anat = None
        if not options.get("anat", None) is None:
            # grap anat file(s)
            anat_wildcard = os.path.join(subject_data_dir, options["anat"])
            anat = glob.glob(anat_wildcard)

            # skip subject if anat absent
            if len(anat) < 1:
                print (
                    "subject %s: anat image matching %s not found!; skipping" " subject" % (subject_id, anat_wildcard)
                )
                continue

            # we need just 1 anat volume
            anat = anat[0]
            anat_dir = os.path.dirname(anat)
        else:
            anat = None
            anat_dir = ""

        # anat output dir
        anat_output_dir = None
        if anat_dir:
            anat_output_dir = os.path.join(subject_output_dir, get_relative_path(subject_data_dir, anat_dir))
            if not os.path.exists(anat_output_dir):
                os.makedirs(anat_output_dir)

        # make subject data
        subject_data = SubjectData(
            subject_id=subject_id,
            func=func,
            anat=anat,
            output_dir=subject_output_dir,
            session_output_dirs=sess_output_dirs,
            anat_output_dir=anat_output_dir,
            session_id=sess_ids,
            data_dir=subject_data_dir,
            onset=onset,
            TR=options.get("TR", None),
            drift_model="Cosine",
            hrf_model=options.get("hrf_model", "Canonical With Derivative"),
            hfcut=options.get("hfcut", 128.0),
            time_units=options.get("time_units", "seconds"),
        )
        subjects.append(subject_data)

    if not subjects:
        warnings.warn(
            "No subjects globbed (dataset_dir=%s, subject_dir_wildcard=%s" % (dataset_dir, subject_dir_wildcard)
        )

    return subjects, preproc_params
コード例 #3
0
def _generate_preproc_pipeline(config_file,
                               dataset_dir=None,
                               output_dir=None,
                               scratch=None,
                               options_callback=None,
                               **kwargs):
    """
    Generate pipeline (i.e subject factor + preproc params) from
    config file.

    dataset_dir, and output_dir can be specified in config_file, passed as
    function argument, or exported in environ variables like so:
    OUTPUT_DIR=/some/dir, etc.

    Returns
    -------
    subjects: list of `SubjectData` objects
        subject list

    preproc_params: dict
        preproc parameters

    """
    # read config file
    config_file = os.path.abspath(config_file)
    options = _parse_job(config_file, **kwargs)
    options = _del_nones_from_dict(options)

    # sanitize output_dir and dataset_dir
    dataset_dir = os.environ.get("DATASET_DIR", dataset_dir)
    output_dir = os.environ.get("OUTPUT_DIR", output_dir)
    scratch = os.environ.get("SCRATCH", scratch)
    for item in ["dataset_dir", "output_dir", "scratch"]:
        val = eval(item)
        if val is None:
            if item not in options:
                # get value from environ (if exists)
                val = os.environ.get(item.upper(), val)
                if val is None and item in ["dataset_dir", "output_dir"]:
                    raise ValueError(
                        ("%s not specified (neither in environ variable, "
                         "nor in config_file nor in this function "
                         "call)") % item)

                # set value from environ
                if item == "dataset_dir":
                    dataset_dir = val
                elif item == "output_dir":
                    output_dir = val
    options["dataset_dir"] = dataset_dir = (
        dataset_dir if dataset_dir is not None else options["dataset_dir"])
    options["output_dir"] = output_dir = (output_dir if output_dir is not None
                                          else options["output_dir"])
    options["scratch"] = scratch = (scratch if scratch is not None else
                                    options.get("scratch", output_dir))
    assert options["dataset_dir"]
    assert options["output_dir"]

    # load data from multiple dataset_dirs
    if not isinstance(dataset_dir, basestring):
        kwargs["output_dir"] = output_dir
        tmp = [
            _generate_preproc_pipeline(config_file,
                                       dataset_dir=dsd,
                                       options_callback=options_callback,
                                       **kwargs) for dsd in dataset_dir
        ]
        subjects = [subject for x in tmp for subject in x[0]]
        return subjects, tmp[0][1]

    # invoke callback
    if options_callback:
        options = options_callback(options)
        if dataset_dir is None:
            dataset_dir = options.get("dataset_dir", None)
        if output_dir is None:
            output_dir = options.get("output_dir", None)
        if scratch is None:
            output_dir = options.get("scratch", None)

    # check dataset_dir
    dataset_dir = _expand_path(dataset_dir)
    if not os.path.isdir(dataset_dir):
        raise OSError("dataset_dir '%s' doesn't exist" % dataset_dir)

    # check output_dir
    output_dir = _expand_path(options["output_dir"], relative_to=dataset_dir)
    scratch = _expand_path(options["scratch"], relative_to=dataset_dir)
    if output_dir is None:
        raise OSError(("Could not expand 'output_dir' specified in %s: invalid"
                       " path %s (relative to directory %s)") %
                      (config_file, options["output_dir"], dataset_dir))

    # dataset description
    dataset_description = options.get("dataset_description", None)

    # preproc parameters
    preproc_params = {
        "spm_dir": options.get("spm_dir", None),
        "matlab_exec": options.get("matlab_exec", None),
        "report": options.get("report", True),
        "output_dir": output_dir,
        "scratch": scratch,
        "dataset_id": options.get("dataset_id", dataset_dir),
        "n_jobs": options.get("n_jobs", None),
        "caching": options.get("caching", True),
        "tsdiffana": options.get("tsdiffana", True),
        "dataset_description": dataset_description,
        "slice_timing_software": options.get("slice_timing_software", "spm"),
        "realign_software": options.get("realign_software", "spm"),
        "coregister_software": options.get("coregister_software", "spm"),
        "smooth_software": options.get("smooth_software", "spm")
    }

    # delete orientation meta-data ?
    preproc_params['deleteorient'] = options.get("deleteorient", False)

    # configure slice-timing correction node
    preproc_params["slice_timing"] = not options.get("disable_slice_timing",
                                                     False)
    # can't do STC without TR
    if preproc_params["slice_timing"]:
        preproc_params.update(
            dict((k, options.get(k, None))
                 for k in ["TR", "TA", "slice_order", "interleaved"]))
        if preproc_params["TR"] is None:
            preproc_params["slice_timing"] = False

    # configure motion correction node
    preproc_params["realign"] = not options.get("disable_realign", False)
    if preproc_params["realign"]:
        preproc_params['realign_reslice'] = options.get(
            "reslice_realign", False)
        preproc_params['register_to_mean'] = options.get(
            "register_to_mean", True)

    # configure coregistration node
    preproc_params["coregister"] = not options.get("disable_coregister", False)
    if preproc_params["coregister"]:
        preproc_params['coregister_reslice'] = options.get(
            "coregister_reslice")
        preproc_params['coreg_anat_to_func'] = not options.get(
            "coreg_func_to_anat", True)

    # configure tissue segmentation node
    preproc_params["segment"] = not options.get("disable_segment", False)
    preproc_params["newsegment"] = options.get(
        "newsegment", False) and preproc_params["segment"]

    # configure normalization node
    preproc_params["normalize"] = not options.get("disable_normalize", False)

    # configure output voxel sizes
    for brain in ["func", "anat"]:
        k = "%s_write_voxel_size" % brain
        ks = k + "s"
        if k in options:
            assert not ks in options, (
                "Both %s and %s specified in ini file. Please use only one of "
                "them, they mean thesame thing!")
            options[ks] = options.pop(k)
        preproc_params[ks] = options.get(ks, [[3, 3, 3], [1, 1,
                                                          1]][brain == "anat"])

    # configure dartel
    preproc_params['dartel'] = options.get("dartel", False)
    preproc_params['output_modulated_tpms'] = options.get(
        "output_modulated_tpms", False)

    # can't do dartel without newsegment!
    if not preproc_params["newsegment"]:
        preproc_params["newsegment"] = preproc_params["dartel"]

    # configure smoothing node
    preproc_params["fwhm"] = options.get("fwhm", 0.)
    preproc_params["anat_fwhm"] = options.get("anat_fwhm", 0.)

    # how many subjects ?
    subjects = []
    nsubjects = options.get('nsubjects', np.inf)
    exclude_these_subject_ids = options.get('exclude_these_subject_ids', [])
    include_only_these_subject_ids = options.get(
        'include_only_these_subject_ids', [])

    def _ignore_subject(subject_id):
        """
        Ignore given subject_id ?

        """
        if subject_id in exclude_these_subject_ids:
            return True
        elif len(include_only_these_subject_ids
                 ) and subject_id not in include_only_these_subject_ids:
            return True
        else:
            return False

    # subject data factory
    subject_data_dirs = options.get("subject_dirs", "*")
    if isinstance(subject_data_dirs, basestring):
        subject_dir_wildcard = os.path.join(dataset_dir, subject_data_dirs)
        subject_data_dirs = [
            x for x in sorted(glob.glob(subject_dir_wildcard))
            if os.path.isdir(x)
        ]
    else:
        # list of subjects or subject wildcards
        subject_data_dirs = [
            os.path.join(dataset_dir, x) for x in subject_data_dirs
        ]
        subject_dir_wildcard = subject_data_dirs
        aux = []
        for subject_data_dir in subject_data_dirs:
            for x in sorted(glob.glob(subject_data_dir)):
                if os.path.isdir(x):
                    aux.append(x)
        subject_data_dirs = aux
    sess_func_wildcards = [
        key for key in options.keys() if re.match("session_.+_func", key)
    ]
    sess_onset_wildcards = [
        key for key in options.keys() if re.match("session_.+_onset", key)
    ]
    sess_ids = [
        re.match("session_(.+)_func", session).group(1)
        for session in sess_func_wildcards
    ]
    if not subject_data_dirs:
        warnings.warn("No subject directories found for wildcard: %s" %
                      (subject_dir_wildcard))
        return [], preproc_params

    for subject_data_dir in subject_data_dirs:
        if len(subjects) == nsubjects:
            # we've had enough subjects already; end
            break
        subject_id = os.path.basename(subject_data_dir)
        if _ignore_subject(subject_id):
            continue
        subject_output_dir = os.path.join(output_dir, subject_id)
        if scratch is not None:
            subject_scratch = os.path.join(scratch, subject_id)
        else:
            subject_scratch = None

        # grab functional data
        func = []
        sess_output_dirs = []
        skip_subject = False
        onset = []
        for s, sess_func_wildcard in enumerate(sess_func_wildcards):
            o = None
            if s < len(sess_onset_wildcards):
                sess_onset_wildcard = sess_onset_wildcards[s]
                sess_onset_wildcard = options[sess_onset_wildcard]
                sess_onset_wildcard = os.path.join(subject_data_dir,
                                                   sess_onset_wildcard)
                sess_onset = sorted(glob.glob(sess_onset_wildcard))
                if len(sess_onset) > 1:
                    raise ValueError
                if len(sess_onset) > 0:
                    o = sess_onset[0]
            onset.append(o)
            sess_func_wildcard = options[sess_func_wildcard]
            sess_func_wildcard = os.path.join(subject_data_dir,
                                              sess_func_wildcard)
            sess_func = sorted(glob.glob(sess_func_wildcard))

            # skip session if no data found
            if not sess_func:
                warnings.warn(
                    ("subject %s: No func images found for"
                     " wildcard %s" % (subject_id, sess_func_wildcard)))
                continue
            sess_dir = os.path.dirname(sess_func[0])
            if len(sess_func) == 1:
                sess_func = sess_func[0]
            func.append(sess_func)

            # session output dir
            if os.path.basename(sess_dir) != os.path.basename(
                    subject_output_dir):
                sess_output_dir = os.path.join(
                    subject_output_dir,
                    get_relative_path(subject_data_dir, sess_dir))
            else:
                sess_output_dir = subject_output_dir
            if not os.path.exists(sess_output_dir):
                os.makedirs(sess_output_dir)
            sess_output_dirs.append(sess_output_dir)

        # something is wrong with this guy, skip
        if skip_subject:
            warnings.warn("Skipping subject %s" % subject_id)
            continue

        # grab anat
        anat = None
        if not options.get("anat", None) is None:
            # grap anat file(s)
            anat_wildcard = os.path.join(subject_data_dir, options['anat'])
            anat = glob.glob(anat_wildcard)

            # skip subject if anat absent
            if len(anat) < 1:
                print(
                    "subject %s: anat image matching %s not found!; skipping"
                    " subject" % (subject_id, anat_wildcard))
                continue

            # we need just 1 anat volume
            anat = anat[0]
            anat_dir = os.path.dirname(anat)
        else:
            anat = None
            anat_dir = ""

        # anat output dir
        anat_output_dir = None
        if anat_dir:
            anat_output_dir = os.path.join(
                subject_output_dir,
                get_relative_path(subject_data_dir, anat_dir))
            if not os.path.exists(anat_output_dir):
                os.makedirs(anat_output_dir)

        # make subject data
        subject_data = SubjectData(
            subject_id=subject_id,
            func=func,
            anat=anat,
            output_dir=subject_output_dir,
            scratch=subject_scratch,
            session_output_dirs=sess_output_dirs,
            anat_output_dir=anat_output_dir,
            session_id=sess_ids,
            data_dir=subject_data_dir,
            onset=onset,
            TR=options.get('TR', None),
            drift_model='Cosine',
            hrf_model=options.get('hrf_model', 'spm + derivative'),
            hfcut=options.get("hfcut", 128.),
            time_units=options.get("time_units", "seconds"))
        subjects.append(subject_data)

    if not subjects:
        warnings.warn(
            "No subjects globbed (dataset_dir=%s, subject_dir_wildcard=%s" %
            (dataset_dir, subject_dir_wildcard))

    return subjects, preproc_params
コード例 #4
0
def _generate_preproc_pipeline(jobfile,
                               dataset_dir=None,
                               options_callback=None,
                               **kwargs):
    """
    Generate pipeline (i.e subject factor + preproc params) from
    config file.

    Returns
    -------
    subjects: list of `SubjectData` objects
        subject list

    preproc_params: dict
        preproc parameters

    """

    # read config file
    jobfile = os.path.abspath(jobfile)
    options = _parse_job(jobfile, **kwargs)
    options = _del_nones_from_dict(options)

    # generate subject conf
    if dataset_dir is None:
        assert "dataset_dir" in options, (
            "dataset_dir not specified (neither in jobfile"
            " nor in this function call)")
        dataset_dir = options["dataset_dir"]
    else:
        assert not dataset_dir is None, (
            "dataset_dir not specified (neither in jobfile"
            " nor in this function call")

    options["dataset_dir"] = dataset_dir

    if not isinstance(dataset_dir, basestring):
        tmp = [
            _generate_preproc_pipeline(jobfile,
                                       dataset_dir=dsd,
                                       options_callback=options_callback,
                                       **kwargs) for dsd in dataset_dir
        ]

        subjects = [subject for x in tmp for subject in x[0]]

        return subjects, tmp[0][1]

    if options_callback:
        options = options_callback(options)
        dataset_dir = options["dataset_dir"]

    dataset_dir = _expand_path(dataset_dir)
    assert os.path.isdir(dataset_dir), ("dataset_dir %s doesn't exist" %
                                        dataset_dir)

    # output dir
    output_dir = _expand_path(options["output_dir"], relative_to=dataset_dir)
    if output_dir is None:
        raise RuntimeError(
            ("Could not expand 'output_dir' specified in %s: invalid"
             " path %s (relative to directory %s)") %
            (jobfile, options["output_dir"], dataset_dir))

    # dataset description
    dataset_description = options.get("dataset_description", None)

    # preproc parameters
    preproc_params = {
        "spm_dir": options.get("spm_dir", None),
        "matlab_exec": options.get("matlab_exec", None),
        "report": options.get("report", True),
        "output_dir": output_dir,
        "dataset_id": options.get("dataset_id", dataset_dir),
        "n_jobs": options.get("n_jobs", None),
        "caching": options.get("caching", True),
        "cv_tc": options.get("cv_tc", True),
        "dataset_description": dataset_description,
        "slice_timing_software": options.get("slice_timing_software", "spm"),
        "realign_software": options.get("realign_software", "spm"),
        "coregister_software": options.get("coregister_software", "spm"),
    }

    # delete orientation meta-data ?
    preproc_params['deleteorient'] = options.get("deleteorient", False)

    # configure slice-timing correction node
    preproc_params["slice_timing"] = not options.get("disable_slice_timing",
                                                     False)
    # can't do STC without TR
    if preproc_params["slice_timing"]:
        preproc_params.update(
            dict((k, options.get(k, None))
                 for k in ["TR", "TA", "slice_order", "interleaved"]))
        if preproc_params["TR"] is None:
            preproc_params["slice_timing"] = False

    # configure motion correction node
    preproc_params["realign"] = not options.get("disable_realign", False)
    if preproc_params["realign"]:
        preproc_params['realign_reslice'] = options.get(
            "reslice_realign", False)
        preproc_params['register_to_mean'] = options.get(
            "register_to_mean", True)

    # configure coregistration node
    preproc_params["coregister"] = not options.get("disable_coregister", False)
    if preproc_params["coregister"]:
        preproc_params['coregister_reslice'] = options["coregister_reslice"]
        preproc_params['coreg_anat_to_func'] = not options.get(
            "coreg_func_to_anat", True)

    # configure tissue segmentation node
    preproc_params["segment"] = not options.get("disable_segment", False)
    if preproc_params["segment"]:
        pass  # XXX pending code...

    # configure normalization node
    preproc_params["normalize"] = not options.get("disable_normalize", False)

    # configure output voxel sizes
    for brain in ["func", "anat"]:
        k = "%s_write_voxel_size" % brain
        ks = k + "s"
        if k in options:
            assert not ks in options, (
                "Both %s and %s specified in ini file. Please use only one of "
                "them, they mean thesame thing!")
            options[ks] = options.pop(k)
        else:
            print k
        preproc_params[ks] = options.get(ks, [[3, 3, 3], [1, 1,
                                                          1]][brain == "anat"])

    # configure dartel
    preproc_params['dartel'] = options.get("dartel", False)
    preproc_params['output_modulated_tpms'] = options.get(
        "output_modulated_tpms", False)

    # configure smoothing node
    preproc_params["fwhm"] = options.get("fwhm", 0.)
    preproc_params["anat_fwhm"] = options.get("anat_fwhm", 0.)

    # how many subjects ?
    subjects = []
    nsubjects = options.get('nsubjects', np.inf)
    exclude_these_subject_ids = options.get('exclude_these_subject_ids', [])
    include_only_these_subject_ids = options.get(
        'include_only_these_subject_ids', [])

    def _ignore_subject(subject_id):
        """
        Ignore given subject_id ?

        """

        if subject_id in exclude_these_subject_ids: return True
        elif len(include_only_these_subject_ids
                 ) and not subject_id in include_only_these_subject_ids:
            return True
        else:
            return False

    # subject data factory
    subject_dir_wildcard = os.path.join(dataset_dir,
                                        options.get("subject_dirs", "*"))
    sessions = [k for k in options.keys() if re.match("session_.+_func", k)]
    session_ids = [
        re.match("session_(.+)_func", session).group(1) for session in sessions
    ]
    subject_data_dirs = sorted(glob.glob(subject_dir_wildcard))
    if not subject_data_dirs:
        warnings.warn("No subject directories found for wildcard: %s" %
                      (subject_dir_wildcard))
        return [], preproc_params

    for subject_data_dir in subject_data_dirs:
        if len(subjects) == nsubjects: break

        subject_id = os.path.basename(subject_data_dir)
        if _ignore_subject(subject_id): continue

        subject_output_dir = os.path.join(output_dir, subject_id)

        # grab functional data
        func = []
        sess_output_dirs = []
        skip_subject = False
        for session in sessions:
            session = options[session]
            sess_func_wildcard = os.path.join(subject_data_dir, session)
            sess_func = sorted(glob.glob(sess_func_wildcard))
            if not sess_func:
                print("subject %s: No func images found for"
                      " wildcard %s" % (subject_id, sess_func_wildcard))
                skip_subject = True
                break
            sess_dir = os.path.dirname(sess_func[0])
            if len(sess_func) == 1:
                sess_func = sess_func[0]
            func.append(sess_func)

            # session output dir
            if os.path.basename(sess_dir) != os.path.basename(
                    subject_output_dir):
                sess_output_dir = os.path.join(
                    subject_output_dir,
                    get_relative_path(subject_data_dir, sess_dir))
            else:
                sess_output_dir = subject_output_dir
            if not os.path.exists(sess_output_dir):
                os.makedirs(sess_output_dir)
            sess_output_dirs.append(sess_output_dir)

        if skip_subject:
            print "Skipping subject %s" % subject_id
            continue

        # grab anat
        anat = None
        if not options.get("anat", None) is None:
            anat_wildcard = os.path.join(subject_data_dir, options['anat'])
            anat = glob.glob(anat_wildcard)

            # skip subject if anat absent
            if len(anat) < 1:
                print(
                    "subject %s: anat image matching %s not found!; skipping"
                    " subject" % (subject_id, anat_wildcard))
                continue

            anat = anat[0]
            anat_dir = os.path.dirname(anat)
        else:
            anat = None
            anat_dir = ""

        # anat output dir
        anat_output_dir = None
        if anat_dir:
            anat_output_dir = os.path.join(
                subject_output_dir,
                get_relative_path(subject_data_dir, anat_dir))

            if not os.path.exists(anat_output_dir):
                os.makedirs(anat_output_dir)

        # make subject data
        subject_data = SubjectData(subject_id=subject_id,
                                   func=func,
                                   anat=anat,
                                   output_dir=subject_output_dir,
                                   session_output_dirs=sess_output_dirs,
                                   anat_output_dir=anat_output_dir,
                                   session_id=session_ids,
                                   data_dir=subject_data_dir)
        subjects.append(subject_data)

    if not subjects:
        warnings.warn(
            "No subjects globbed (dataset_dir=%s, subject_dir_wildcard=%s" %
            (dataset_dir, subject_dir_wildcard))

    return subjects, preproc_params