Python Workflow Exemples, pydra.Workflow Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : wf_task.py Projet : chasejohnson3/pydra_sandbox

import time
import uuid

import attr
from nipype.interfaces.base import (
    Directory,
    File,
    InputMultiPath,
    OutputMultiPath,
    traits,
)
from pydra import ShellCommandTask
from pydra.engine.specs import SpecInfo, ShellSpec, File


wf = pydra.Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"])

wf.inputs.cmd1 = "touch"
wf.inputs.cmd2 = "cp"
wf.inputs.args = "newfile.txt"

my_input_spec1 = SpecInfo(
    name="Input",
    fields=[
        (
            "file",
            attr.ib(
                type=str,
                metadata={
                    "output_file_template": "{args}",
                    "help_string": "output file",

Exemple #2

0

Afficher le fichier

            "landmarkWeights":
            "/Shared/sinapse/CACHE/20200915_PREDICTHD_base_CACHE/Atlas/20141004_BCD/template_weights_50Lmks.wts",
            "landmarks":
            "/Shared/sinapse/CACHE/20200915_PREDICTHD_base_CACHE/Atlas/20141004_BCD/template_landmarks_50Lmks.fcsv",
        },
        "out": {
            "output_dir": "/localscratch/Users/cjohnson30/output_dir"
        },
    }

    nest_asyncio.apply()

    # Create the inputs to the workflow
    wf = pydra.Workflow(name="wf",
                        input_spec=[
                            "t1", "templateModel", "llsModel",
                            "landmarkWeights", "landmarks", "output_dir"
                        ],
                        output_spec=["output_dir"])

    wf.split("t1", t1=[subject1_json["in"]["t1"], subject2_json["in"]["t1"]])
    #t1=["/localscratch/Users/cjohnson30/BCD_Practice/t1w_examples2/sub-066260_ses-21713_run-002_T1w.nii.gz",
    #                       "/localscratch/Users/cjohnson30/BCD_Practice/t1w_examples2/sub-066217_ses-29931_run-003_T1w.nii.gz"])

    print(wf.inputs.t1)
    # Set the inputs of Resample
    resample = BRAINSResample("BRAINSResample").get_task()
    resample.inputs.inputVolume = wf.lzin.t1  #wf.BRAINSConstellationDetector.lzout.outputResampledVolume
    resample.inputs.interpolationMode = "Linear"
    resample.inputs.pixelType = "binary"
    resample.inputs.referenceVolume = "/localscratch/Users/cjohnson30/resample_refs/t1_average_BRAINSABC.nii.gz"
    resample.inputs.warpTransform = "/localscratch/Users/cjohnson30/resample_refs/atlas_to_subject.h5"

Exemple #3

0

Afficher le fichier

def gen_workflow(inputs, cache_dir=None, cache_locations=None):
    wf = pydra.Workflow(
        name="ml_wf",
        input_spec=list(inputs.keys()),
        **inputs,
        cache_dir=cache_dir,
        cache_locations=cache_locations,
        audit_flags=AuditFlag.ALL,
        messengers=FileMessenger(),
        messenger_args={"message_dir": os.path.join(os.getcwd(), "messages")},
    )
    wf.split(["clf_info", "permute"])
    wf.add(
        read_file_pdt(
            name="readcsv",
            filename=wf.lzin.filename,
            x_indices=wf.lzin.x_indices,
            target_vars=wf.lzin.target_vars,
            group=wf.lzin.group_var,
        ))
    wf.add(
        gen_splits_pdt(
            name="gensplit",
            n_splits=wf.lzin.n_splits,
            test_size=wf.lzin.test_size,
            X=wf.readcsv.lzout.X,
            Y=wf.readcsv.lzout.Y,
            groups=wf.readcsv.lzout.groups,
        ))
    wf.add(
        train_test_kernel_pdt(
            name="fit_clf",
            X=wf.readcsv.lzout.X,
            y=wf.readcsv.lzout.Y,
            train_test_split=wf.gensplit.lzout.splits,
            split_index=wf.gensplit.lzout.split_indices,
            clf_info=wf.lzin.clf_info,
            permute=wf.lzin.permute,
        ))
    wf.fit_clf.split("split_index")
    wf.add(
        calc_metric_pdt(name="metric",
                        output=wf.fit_clf.lzout.output,
                        metrics=wf.lzin.metrics))
    wf.metric.combine("fit_clf.split_index")
    wf.add(
        get_shap_pdt(
            name="shap",
            X=wf.readcsv.lzout.X,
            permute=wf.lzin.permute,
            model=wf.fit_clf.lzout.model,
            gen_shap=wf.lzin.gen_shap,
            nsamples=wf.lzin.nsamples,
            l1_reg=wf.lzin.l1_reg,
        ))
    wf.shap.combine("fit_clf.split_index")
    wf.set_output([
        ("output", wf.metric.lzout.output),
        ("score", wf.metric.lzout.score),
        ("shaps", wf.shap.lzout.shaps),
        ("feature_names", wf.readcsv.lzout.feature_names),
    ])
    return wf

Exemple #4

0

Afficher le fichier

Fichier : pydra_incrementation.py Projet : glatard/sample-pipelines

                        help="Name of the experiment being performed")
    parser.add_argument("iterations", type=int, help="number of iterations")
    parser.add_argument("delay",
                        type=float,
                        help="sleep delay during "
                        "incrementation")
    parser.add_argument("--benchmark",
                        action="store_true",
                        help="benchmark results")

    args_ = parser.parse_args()
    paths = crawl_dir(os.path.abspath(args_.bb_dir))

    wf = pydra.Workflow(name="pydra-incrementation",
                        input_spec=["f", "start", "args", "it"],
                        start=start,
                        args=args_,
                        cache_dir=args_.output_dir,
                        it=0)

    print("Output directory", wf.output_dir)

    increment = pydra.mark.task(increment)

    wf.split("f", f=paths)

    func_name = "increment{}".format(0)
    wf.add(
        increment(name=func_name,
                  filename=wf.lzin.f,
                  start=wf.lzin.start,
                  args=wf.lzin.args,

Exemple #5

0

Afficher le fichier

        "formatted according to the BIDS standard.",
    )
    parser.add_argument(
        "output_dir",
        help="The directory where the output files "
        "should be stored. If you are running group level analysis "
        "this folder should be prepopulated with the results of the"
        "participant level analysis.",
    )
    args = parser.parse_args()

    subject_dirs = glob(os.path.join(args.bids_dir, "sub-*"))
    subjects_to_analyze = [subject_dir.split("-")[-1] for subject_dir in subject_dirs]

    wf = pydra.Workflow(
        name="BIDS App Example with Boutiques",
        input_spec=["infile", "maskfile"]
    )

    T1_files = [
        os.path.abspath(T1_file)
        for subject_label in subjects_to_analyze
        for T1_file in glob(
            os.path.join(args.bids_dir, "sub-%s" % subject_label, "anat", "*_T1w.nii*")
        )
        + glob(
            os.path.join(
                args.bids_dir, "sub-%s" % subject_label, "ses-*", "anat", "*_T1w.nii*"
            )
        )
    ]

Exemple #6

0

Afficher le fichier

    def _run_pydra(self, image, soft_ver_str):
        wf = pydra.Workflow(
            name="wf",
            input_spec=["image"],
            cache_dir=self.working_dir,
        )
        wf.inputs.image = image

        # 1st task - analysis
        param_run = self.params["analysis"]
        cmd_run = [param_run["command"]]
        inp_fields_run = []
        inp_val_run = {}

        if param_run["script"]:
            script_run = param_run["script"]
            inp_fields_run.append((
                "script",
                attr.ib(type=pydra.specs.File,
                        metadata={
                            "argstr": "",
                            "position": 1,
                            "help_string": "script file",
                            "mandatory": True,
                        }),
            ))
            inp_val_run[f"script"] = script_run

        output_file_dict = {}
        for ind, inputs in enumerate(param_run["inputs"]):
            inputs = deepcopy(inputs)
            value = inputs.pop("value")
            name = inputs.pop("name", f"inp_{ind}")
            output_file = inputs.pop("output_file", False)
            # default values for metadata
            metadata = {
                "argstr": "",
                "position": ind + 2,
                "help_string": f"inp_{ind}",
                "mandatory": True,
            }
            tp = inputs.pop("type")
            if tp == "File":
                tp = pydra.specs.File
                metadata["container_path"] = True
            # updating metadata with values provided in parameters file
            metadata.update(inputs)

            field = (name, attr.ib(type=tp, metadata=metadata))
            inp_fields_run.append(field)

            if tp is pydra.specs.File:
                inp_val_run[name] = f"/data/{value}"
                process_path_obj(value, self.data_path)
            else:
                if output_file:
                    output_file_dict[name] = value
                    value = os.path.join("/output_pydra", value)
                inp_val_run[name] = value

        input_spec_run = pydra.specs.SpecInfo(name="Input",
                                              fields=inp_fields_run,
                                              bases=(pydra.specs.DockerSpec, ))

        out_fields_run = []
        for el in self.params["tests"]:
            if isinstance(el["file"], str):
                if el["file"] in output_file_dict:
                    el["file"] = output_file_dict[el["file"]]
                out_fields_run.append(
                    (f"file_{el['name']}", pydra.specs.File, el["file"]))
            elif isinstance(el["file"], list):
                for ii, file in enumerate(el["file"]):
                    out_fields_run.append(
                        (f"file_{el['name']}_{ii}", pydra.specs.File, file))
            else:
                raise Exception(
                    f"value for file in params['tests'] has to be a str or a list"
                )

        output_spec_run = pydra.specs.SpecInfo(
            name="Output",
            fields=out_fields_run,
            bases=(pydra.specs.ShellOutSpec, ))

        task_run = pydra.DockerTask(
            name="run",
            executable=cmd_run,
            image=wf.lzin.image,
            input_spec=input_spec_run,
            output_spec=output_spec_run,
            bindings=[(self.data_path, "/data", "ro")],
            **inp_val_run,
        )
        wf.add(task_run)

        # 2nd task - creating list from the 1st task output
        @pydra.mark.task
        @pydra.mark.annotate({"return": {"outfiles": list}})
        def outfiles_list(res):
            out_f = []
            for el in self.params["tests"]:
                if isinstance(el["file"], (tuple, list)):
                    out_f.append(
                        tuple([
                            res[f"file_{el['name']}_{i}"]
                            for i in range(len(el["file"]))
                        ]))
                else:
                    out_f.append(res[f"file_{el['name']}"])
            return out_f

        wf.add(outfiles_list(name="outfiles", res=wf.run.lzout.all_))

        # 3rd task - tests
        input_spec_test = pydra.specs.SpecInfo(
            name="Input",
            fields=[
                (
                    "script_test",
                    attr.ib(type=pydra.specs.File,
                            metadata={
                                "argstr": "",
                                "position": 1,
                                "help_string": "test file",
                                "mandatory": True,
                            }),
                ),
                (
                    "file_out",
                    attr.ib(type=(tuple, pydra.specs.File),
                            metadata={
                                "position": 2,
                                "help_string": "out file",
                                "argstr": "-out",
                                "mandatory": True,
                            }),
                ),
                (
                    "file_ref",
                    attr.ib(type=(tuple, pydra.specs.File),
                            metadata={
                                "position": 3,
                                "argstr": "-ref",
                                "help_string": "out file",
                                "mandatory": True,
                                "container_path": True,
                            }),
                ),
                (
                    "name_test",
                    attr.ib(type=str,
                            metadata={
                                "position": 4,
                                "argstr": "-name",
                                "help_string": "test name",
                                "mandatory": True,
                            }),
                ),
            ],
            bases=(pydra.specs.ShellSpec, ),
        )

        output_spec_test = pydra.specs.SpecInfo(
            name="Output",
            fields=[("reports", pydra.specs.File, "report_*.json")],
            bases=(pydra.specs.ShellOutSpec, ),
        )

        if self.test_image:
            container_info = ("docker", self.test_image, [(self.data_ref_path,
                                                           "/data_ref", "ro")])
            file_ref_dir = Path("/data_ref")
        else:
            container_info = None
            file_ref_dir = self.data_ref_path

        inp_val_test = {}
        inp_val_test["name_test"] = [el["name"] for el in self.params["tests"]]
        inp_val_test["script_test"] = [
            el["script"] for el in self.params["tests"]
        ]
        inp_val_test["file_ref"] = []
        for el in self.params["tests"]:
            if isinstance(el["file"], str):
                inp_val_test["file_ref"].append(file_ref_dir / el["file"])
            elif isinstance(el["file"], list):
                inp_val_test["file_ref"].append(
                    tuple([file_ref_dir / file for file in el["file"]]))

        task_test = pydra.ShellCommandTask(
            name="test",
            executable="python",
            container_info=container_info,
            input_spec=input_spec_test,
            output_spec=output_spec_test,
            file_out=wf.outfiles.lzout.outfiles,
            **inp_val_test,
        ).split((("script_test", "name_test"), ("file_out", "file_ref")))
        wf.add(task_test)

        # setting wf output
        wf.set_output([
            ("outfiles", wf.outfiles.lzout.outfiles),
            ("test_out", wf.test.lzout.stdout),
            ("reports", wf.test.lzout.reports),
        ])
        print(f"\n running pydra workflow for {self.workflow_path} "
              f"in working directory - {self.working_dir}")
        with pydra.Submitter(plugin="cf") as sub:
            sub(wf)
        res = wf.result()
        self.reports[soft_ver_str] = res.output.reports

Exemple #7

0

Afficher le fichier

Fichier : 4_intro_workflow.py Projet : PeerHerholz/pydra-tutorial

    return a**n

@pydra.mark.task
def mult_var(a, b):
    return a * b


# -

# ## Introduction to Workflow
#
# In order to run multiple tasks within one pipeline, we use another *pydra* class - `Workflow`. The workflow will contain an arbitrary number of tasks that will be treated as a graph.
#
# Let's start from a workflow with a single task that has one input `x`. When we create a `Workflow`, we have to specify `input_spec` that contains all of the workflow inputs:

wf1 = pydra.Workflow(name="wf1", input_spec=["x"], x=3)

# Now, we can add a task and specify that `x` will be taken from the workflow input by using so-called *Lazy Input*, `x=wf1.lzin.x`. We should also add the `name` to the task we are using in the `Workflow`.

wf1.add(add_two(name="sum", x=wf1.lzin.x))

# Now, we can access the task by using the task name:

wf1.sum

# We have to also specify what would be the workflow output, for this one-task workflow, we simply take the output of `sum` and we use *Lazy Output* to set it to `wf.output.out`:

wf1.set_output([("out", wf1.sum.lzout.out)])

# We could also use a dictionary to set the output - `wf1.set_output({"out": wf1.sum.lzout.out})`, or a tuple if we set a single element: `wf1.set_output(("out", wf1.sum.lzout.out))`

Exemple #8

0

Afficher le fichier

Fichier : pydra_bidsapp_example.py Projet : glatard/sample-pipelines

        "output_dir",
        help="The directory where the output files "
        "should be stored. If you are running group level analysis "
        "this folder should be prepopulated with the results of the"
        "participant level analysis.",
    )
    args = parser.parse_args()

    subject_dirs = glob(os.path.join(args.bids_dir, "sub-*"))
    subjects_to_analyze = [
        subject_dir.split("-")[-1] for subject_dir in subject_dirs
    ]

    wf = pydra.Workflow(
        name="BIDS App Example with Boutiques",
        input_spec=["T1_file", "output_dir"],
        output_dir=args.output_dir,
    )

    @pydra.mark.task
    def fsl_bet_boutiques(T1_file, output_dir):
        maskfile = os.path.join(
            output_dir,
            (os.path.split(T1_file)[-1].replace("_T1w", "_brain").replace(
                ".gz", "").replace(".nii", "")),
        )
        fsl_bet = function("zenodo.3267250")
        ret = fsl_bet(
            "-v{0}:{0}".format(T1_file.split('sub-')[0]),
            "-v{0}:{0}".format(output_dir),
            infile=T1_file,