Beispiel #1
0
def create_workflow(inp, out, names = None, verbose=False):
    if not osp.isfile(inp):
        raise Exception('File not found %s'%inp)

    commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()]
    if verbose:
        print commands
    names = ['job_%s'%i for i in xrange(len(commands))] if names is None else names
    jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)]
    workflow = Workflow(jobs=jobs, dependencies=[])
    Helper.serialize(out, workflow)
Beispiel #2
0
def create_workflow(inp, out, names=None, verbose=False):
    if not osp.isfile(inp):
        raise Exception('File not found %s' % inp)

    commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()]
    if verbose:
        print commands
    names = ['job_%s' % i
             for i in xrange(len(commands))] if names is None else names
    jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)]
    workflow = Workflow(jobs=jobs, dependencies=[])
    Helper.serialize(out, workflow)
Beispiel #3
0
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     '''
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     '''
     try:
         from soma_workflow.client import Job, Workflow
         from soma_workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = "No soma-workflow is found. "\
             "Please verify your soma-worklow"\
             "on your computer (e.g. PYTHONPATH) \n"
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True,
                                         client_path=tmp_work_dir_path,
                                         name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     #        np.savez(os.path.join(tmp_work_dir_path,
     #                 SomaWorkflowEngine.dataset_relative_path), **Xy)
     db_size = estimate_dataset_size(**Xy)
     db_size = int(db_size / (1024 * 1024))  # convert it into mega byte
     save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
     store = StoreFs(dirpath=os.path.join(
         tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root,
                                        num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = self._create_jobs(keysfile_list,
                              is_run_local=False,
                              ft_working_directory=ft_working_directory)
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath,
             SomaWorkflowEngine.open_me_by_soma_workflow_gui)
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
def create_somaWF(liste_python_files):
    jobs = []
    for file_python in liste_python_files:
        file_name = os.path.basename(file_python)
        job_1 = Job(command=["python", file_python], name=file_name)
        jobs.append(job_1)

    #jobs = [job_1]
    dependencies = []
    workflow = Workflow(jobs=jobs, dependencies=dependencies)

    # save the workflow into a file
    somaWF_name = os.path.join(path_script, "soma_WF_JOBS")
    Helper.serialize(somaWF_name, workflow)
Beispiel #5
0
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     """
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     """
     try:
         from soma_workflow.client import Job, Workflow
         from soma_workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = (
             "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n"
         )
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     #        np.savez(os.path.join(tmp_work_dir_path,
     #                 SomaWorkflowEngine.dataset_relative_path), **Xy)
     db_size = estimate_dataset_size(**Xy)
     db_size = int(db_size / (1024 * 1024))  # convert it into mega byte
     save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
     store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory)
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui
         )
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
    def test_serialization(self):
        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        self.temporaries += [
            simple_wf_examples.output_dir, tr_wf_examples.output_dir,
            srp_wf_examples.output_dir
        ]
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(
            ("special_command", simple_wf_examples.example_special_command()))

        workflows.append(
            ("mutiple_transfer", tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(
            ("special_transfer", tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(
            ("special_command_srp", srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print("Testing", workflow_name)

            file_path = tempfile.mkstemp(prefix="json_",
                                         suffix=workflow_name + ".wf")
            os.close(file_path[0])
            file_path = file_path[1]
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(
                new_workflow.attributs_equal(workflow),
                "Serialization failed for workflow %s" % workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
    def test_serialization(self):
        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        self.temporaries += [simple_wf_examples.output_dir,
                             tr_wf_examples.output_dir,
                             srp_wf_examples.output_dir]
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(("special_command",
                          simple_wf_examples.example_special_command()))

        workflows.append(("mutiple_transfer",
                          tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(("special_transfer",
                          tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(("special_command_srp",
                          srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print("Testing", workflow_name)

            file_path = tempfile.mkstemp(prefix="json_",
                                         suffix=workflow_name + ".wf")
            os.close(file_path[0])
            file_path = file_path[1]
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(new_workflow.attributs_equal(workflow),
                            "Serialization failed for workflow %s" %
                            workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
Beispiel #8
0
    def test_serialization(self):
        directory = "/tmp/"

        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(("special_command",
                          simple_wf_examples.example_special_command()))

        workflows.append(("mutiple_transfer",
                          tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(("special_transfer",
                          tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(("special_command_srp",
                          srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print("Testing", workflow_name)

            file_path = os.path.join(directory,
                                     "json_" + workflow_name + ".wf")
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(new_workflow.attributs_equal(workflow),
                            "Serialization failed for workflow %s" %
                            workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
    def test_serialization(self):
        directory = "/tmp/"

        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(("special_command",
                          simple_wf_examples.example_special_command()))

        workflows.append(("mutiple_transfer",
                          tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(("special_transfer",
                          tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(("special_command_srp",
                          srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print "Testing", workflow_name

            file_path = os.path.join(directory,
                                     "json_" + workflow_name + ".wf")
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(new_workflow.attributs_equal(workflow),
                            "Serialization failed for workflow %s" %
                            workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
Beispiel #10
0
def save_wf(wf, output_file, mode="soma-workflow"):
    """Save the workflow in a file.

    Support simple JSON commands list (cmd-list) or soma-workflow.

    Parameters:
    ----------
    wf : tuple (cmd-dict, dependancies),
        Workflow to save.
    output_file : str,
        filename for the workflow.
    mode : str in ["soma-workflow", "cmd_list"],
           optional (default="soma-workflow")
        format to save the workflow.
    """
    cmd = wf[0]
    dep_orig = wf[1]
    if mode == "soma-workflow":
        from soma_workflow.client import Job, Workflow, Helper
        for k, v in cmd.iteritems():
            cmd[k] = Job(command=v, name=k)
        dep = [((cmd[a], cmd[b])) for a, b in dep_orig]
        jobs = np.asarray(cmd.values())[np.argsort(cmd.keys())]
        workflow = Workflow(jobs=jobs.tolist(),
                            dependencies=dep)
        Helper.serialize(output_file, workflow)
        return workflow
    elif mode == "cmd-list":
        import json
        for k, v in cmd.iteritems():
            cmd[k] = " ".join(v)
        with open(output_file, 'w') as fd:
            json.dump(dict(cmd=cmd, dep=dep_orig), fd, indent=True)
        return cmd
    else:
        raise TypeError("Invalid workflow mode \'{}\'".format(mode))
Beispiel #11
0
        ],
        name="Merging all the r2 and distribution respectively together.",
        working_directory=scripts_path)

    jobs.append(job_merge)

    # Plotting the maps
    job_final = Job(command=[
        "python", "create_maps.py", "--input", derivatives_path,
        "--parameters", parameters_path, "--subject", args.subject,
        "--fmri_data", fmri_path
    ],
                    name="Creating the maps.",
                    working_directory=scripts_path)
    jobs.append(job_final)
    dependencies.append((job_merge, job_final))

    workflow = Workflow(jobs=jobs, dependencies=dependencies)

    Helper.serialize(os.path.join(inputs_path, 'delete.somawf'), workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Ridge - LPP")

    print("Finished !!!")
Beispiel #12
0
                    listRunPerSubj[s])
            body = body + ", " + str(EEGbadlist[s]) + ", " + ')'

            jobname = subj
            for c in cond:
                jobname = jobname + '_' + str(c)
            ListJobName.append(jobname)

            # write jobs in a dedicated folder
            name_file = []
            name_file = os.path.join(
                path_script, ("JOBS_PREPROC/Preproc_STC1" + jobname + ".py"))
            Listfile.append(name_file)
            with open(name_file, 'w') as python_file:
                python_file.write(body)

jobs = []
for i in range(len(Listfile)):
    JobVar = Job(command=['python', Listfile[i]],
                 name=ListJobName[i],
                 native_specification='-l walltime=4:00:00 -l nodes=1:ppn=8')
    jobs.append(JobVar)
    WfVar = Workflow(jobs=jobs, dependencies=[])

    # save the workflow into a file
    somaWF_name = os.path.join(path_script,
                               "SOMA_WFs/soma_WF_PREPROC_allsub_allcond")
    Helper.serialize(somaWF_name, WfVar)

#####################################################################
Beispiel #13
0
    # use a transparent and complete job name referring to arguments of interest
    tmpname = "_dSPM_" + ListSubj[subj]
    nametag.append(tmpname)

    # write jobs in a dedicated folder
    name_file = []
    name_file = os.path.join(path_script,
                             ("JOBS_SOURCE/Source_" + tmpname + ".py"))
    Listfile.append(name_file)
    python_file = open(name_file, "w")
    python_file.write(body)
    python_file.close

name_file1 = os.path.join(path_script, ("JOBS_SOURCE/source_distTEVT.py"))
with open(name_file1, 'w') as python_file:
    python_file.write(everybody)

jobs = []
for i in range(len(Listfile)):
    #job_1 = Job(command=["python", Listfile[i]], name = nametag[i], native_specification="-l walltime=1:00:00, -l nodes=1:ppn=1")
    job_1 = Job(command=["python", Listfile[i]], name=nametag[i])
    jobs.append(job_1)
    workflow = Workflow(jobs=jobs, dependencies=[])

    # save the workflow into a file
    somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_WF_SOURCE")
    Helper.serialize(somaWF_name, workflow)

#####################################################################
            dependencies.append((fit, transform))
            inner_fold_jobs.append(transform)  # Just for grouping

            # Predict task
            predict_cmd = predict_command(param_transform_files,
                                          param_prediction_file)
            job_name = common_job_name + "/predict"
            predict = Job(command=predict_cmd, name=job_name)
            jobs.append(predict)
            dependencies.append((transform, predict))
            inner_fold_jobs.append(predict)  # Just for grouping

            # Set dependencies of cleaning job
            dependencies.append((predict, clean_job))
        # End loop on params
        # Group all jobs of this fold in a group
        group_elements.append(
            Group(elements=inner_fold_jobs,
                  name="Outer fold {out}/Inner fold {inn}".format(
                      out=outer_fold_index, inn=inner_fold_index)))
    # End inner loop
# End outer loop

workflow = Workflow(jobs=jobs,
                    dependencies=dependencies,
                    root_group=group_elements,
                    name=WF_NAME)

# save the workflow into a file
Helper.serialize(os.path.join(OUT_DIR, WF_NAME), workflow)
Beispiel #15
0
    jobs += group_score + group_significativity + group_merge
    jobs.append(job_final)

    scores = Group(elements=group_score,
                   name="group where test scores are calculated")

    significativity = Group(
        elements=group_significativity,
        name="group where distributions are calculated for significance")

    merge = Group(elements=group_merge, name="group where we merge results")

    workflow = Workflow(jobs=jobs,
                        dependencies=dependencies,
                        root_group=[scores, significativity, merge, job_final])

    Helper.serialize(
        os.path.join(inputs_path, 'optimized_cluster_part_2.somawf'), workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Cluster optimized part 2")

    print("Finished !!!")
    with open(name_file, 'w') as python_file:
        python_file.write(body)  
   
###############################################################################
# create workflow   
jobs = []
for i in range(len(Listfile)):
    JobVar = Job(command=['python', Listfile[i]], name = ListJobName[i],
                 native_specification = '-l walltime=4:00:00, -l nodes=1:ppn=2')
    jobs.append(JobVar)       

# define dependancies (tuples of two jobs)
# the second job will be executed after the first  
# here, plot the grand average after having written evoked for each subject
n = len(ListSubject)
dependencies =  [(jobs[c*n + s + c],jobs[(c + 1)*n + c])
                for s,subject in enumerate(ListSubject)
                for c,condcouple in enumerate(ListCondition)]   
                
# save the workflow into a file    
WfVar = Workflow(jobs=jobs, dependencies=dependencies)
somaWF_name = os.path.join(wdir, 'somawf/workflows/DEMO_WF')
Helper.serialize(somaWF_name, WfVar)

        




     
                 
Beispiel #17
0
2. copy all the spm batch files in the directory containing the
following python script; then run

python create_workflow_spm12batches.py

This must generate a file called 'spm12_batches.somawf'

3. Launch soma_workflow_gui on he command line in the same directory;
open the spm12_batches.somawf file and press 'submit'

Note: use your own PC if there are many cores. 
"""

import os, sys, glob
from soma_workflow.client import Job, Workflow, Helper

if len(sys.argv) == 1:
    spm12_batches = glob.glob("*.mat")
else:
    spm12_batches = sys.argv[1:]

jobs = []
for b in spm12_batches:
    jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b))

workflow = Workflow(jobs)

Helper.serialize('spm12_batches.somawf', workflow)

print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
Beispiel #18
0
            outer = cmd[5]
            key = outer + design
            if (not dep_dic.has_key(key)):
                dep_dic[key] = []
            cur_job = Job(command=cmd,
                          name="Reduce %s outer %s" % (design, outer))
            red_jobs[key] = cur_job
        elif (cmd[1].endswith("multiblox_comparison.outer.reducer.R")):
            print "multiblox_comparison.outer.reducer"
            cur_job = Job(command=cmd, name="Final Reduce")
            final_red = cur_job
        elif (cmd[1].endswith("coxnet.mapper.R")):
            print "coxnet mapper"
            cur_job = Job(command=cmd, name="Coxnet")
            glmnet_job = cur_job
            pass
        else:
            raise Exception("Unknown task, abort...")
        jobs.append(cur_job)
dependencies = []
for k, j in red_jobs.items():
    parent_list = dep_dic[k]
    for p in parent_list:
        dependencies.append((p, j))
    dependencies.append((j, final_red))
dependencies.append((glmnet_job, final_red))
workflow = Workflow(jobs=jobs, dependencies=dependencies)

# save the workflow into a file
Helper.serialize(workflow_file, workflow)
2. copy all the spm batch files in the directory containing the
following python script; then run

python create_workflow_spm12batches.py

This must generate a file called 'spm12_batches.somawf'

3. Launch soma_workflow_gui on he command line in the same directory;
open the spm12_batches.somawf file and press 'submit'

Note: use your own PC if there are many cores. 
"""

import os, sys, glob
from soma_workflow.client import Job, Workflow, Helper

if len(sys.argv)==1:
    spm12_batches = glob.glob("*.mat")
else:
    spm12_batches = sys.argv[1:]

jobs = []
for b in spm12_batches:
    jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b))

workflow=Workflow(jobs)

Helper.serialize('spm12_batches.somawf', workflow)

print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
Beispiel #20
0
        "python", "create_maps.py", "--input", derivatives_path,
        "--parameters", parameters_path, "--subject", args.subject,
        "--fmri_data", fmri_path
    ],
                    name="Creating the maps.",
                    working_directory=scripts_path)
    jobs.append(job_final)
    dependencies.append((job_merge, job_final))

    cv_alphas = Group(elements=group_cv_alphas, name="CV on alphas")
    significativity = Group(elements=group_significativity,
                            name="Fit of the models with best alphas")

    workflow = Workflow(
        jobs=jobs,
        dependencies=dependencies,
        root_group=[job_0, cv_alphas, significativity, job_merge, job_final])

    Helper.serialize(os.path.join(inputs_path, 'cluster_jobs.somawf'),
                     workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Ridge - LPP")

    print("Finished !!!")
Beispiel #21
0
    def export(self, script_path):
        try:
            from soma_workflow.client import Job
            from soma_workflow.client import Group
            from soma_workflow.client import Workflow
            from soma_workflow.client import SharedResourcePath
            from soma_workflow.client import Helper
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError

        # dataset on remote machine
        dataset_dir = SharedResourcePath(
            relative_path=self.dataset_relative_path,
            namespace=self.namespace,
            uuid=self.uuid)
        # Tree on remote machine
        epac_tree_dir = SharedResourcePath(
            relative_path=self.tree_relative_path,
            namespace=self.namespace,
            uuid=self.uuid)
        # Reduce output on remote machine
        out_dir = SharedResourcePath(relative_path=self.output_relative_path,
                                     namespace=self.namespace,
                                     uuid=self.uuid)
        # workflow file for soma-workflow
        soma_workflow_file = script_path
        # iterate all key jobs
        job_paths = []
        for root, _, files in os.walk(
                os.path.join(self.root, self.jobs_relative_path)):
            for f in files:
                _, ext = os.path.splitext(f)
                if ext == ".job":
                    job_paths.append(f)
        # Building mapper task
        dependencies = []
        map_jobs = []
        for i in range(len(job_paths)):
            job_relative_path = os.path.join(self.jobs_relative_path,
                                             job_paths[i])
            key_path = SharedResourcePath(relative_path=job_relative_path,
                                          namespace=self.namespace,
                                          uuid=self.uuid)
            map_cmd = []
            map_cmd.append("epac_mapper")
            map_cmd.append("--datasets")
            map_cmd.append(dataset_dir)
            map_cmd.append("--keysfile")
            map_cmd.append(key_path)
            map_cmd.append("--treedir")
            map_cmd.append(epac_tree_dir)
            map_job = Job(command=map_cmd,
                          name="map_step",
                          referenced_input_files=[],
                          referenced_output_files=[])
            map_jobs.append(map_job)
        group_map_jobs = Group(elements=map_jobs, name="all map jobs")
        # Building reduce step
        reduce_cmd = []
        reduce_cmd.append("epac_reducer")
        reduce_cmd.append("--treedir")
        reduce_cmd.append(epac_tree_dir)
        reduce_cmd.append("--outdir")
        reduce_cmd.append(out_dir)
        reduce_job = Job(command=reduce_cmd,
                         name="reduce_step",
                         referenced_input_files=[],
                         referenced_output_files=[])
        for map_job in map_jobs:
            dependencies.append((map_job, reduce_job))
        jobs = map_jobs + [reduce_job]
        # Build workflow and save into disk
        workflow = Workflow(jobs=jobs,
                            dependencies=dependencies,
                            root_group=[group_map_jobs, reduce_job])
        Helper.serialize(soma_workflow_file, workflow)
Beispiel #22
0
    def export(self, workflow_dir, num_processes):
        '''
        Parameters
        ----------
        workflow_dir: string
            the directory to export workflow
        num_processes: integer
            the number of processes you want to run
        '''
        try:
            from soma_workflow.client import Job
            from soma_workflow.client import Group
            from soma_workflow.client import Workflow
            from soma_workflow.client import Helper
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError

        self.workflow_dir = workflow_dir
        soma_workflow_file = os.path.join(self.workflow_dir, "soma_workflow")
        if not os.path.exists(self.workflow_dir):
            os.makedirs(self.workflow_dir)
        tree_root = load_tree(self.epac_tree_dir_path)
        keysfile_list = export_jobs(tree_root, num_processes, workflow_dir)
        # Building mapper task
        dependencies = []
        map_jobs = []
        for i in range(len(keysfile_list)):
            key_path = os.path.join(workflow_dir, keysfile_list[i])
            map_cmd = []
            map_cmd.append("epac_mapper")
            map_cmd.append("--datasets")
            map_cmd.append(self.dataset_dir_path)
            map_cmd.append("--keysfile")
            map_cmd.append(key_path)
            map_cmd.append("--treedir")
            map_cmd.append(self.epac_tree_dir_path)
            map_job = Job(command=map_cmd,
                          name="map_step",
                          referenced_input_files=[],
                          referenced_output_files=[])
            map_jobs.append(map_job)
        group_map_jobs = Group(elements=map_jobs, name="all map jobs")
        # Building reduce task
        reduce_cmd = []
        reduce_cmd.append("epac_reducer")
        reduce_cmd.append("--treedir")
        reduce_cmd.append(self.epac_tree_dir_path)
        reduce_cmd.append("--outdir")
        reduce_cmd.append(self.out_dir_path)
        reduce_job = Job(command=reduce_cmd,
                         name="reduce_step",
                         referenced_input_files=[],
                         referenced_output_files=[])
        for map_job in map_jobs:
            dependencies.append((map_job, reduce_job))
        jobs = map_jobs + [reduce_job]
        # Build workflow and save into disk
        workflow = Workflow(jobs=jobs,
                            dependencies=dependencies,
                            root_group=[group_map_jobs, reduce_job])
        Helper.serialize(soma_workflow_file, workflow)