Example #1
0
def create_workflow(inp, out, names = None, verbose=False):
    if not osp.isfile(inp):
        raise Exception('File not found %s'%inp)

    commands = [e.rstrip('\n').split(' ') for e in open(inp).readlines()]
    if verbose:
        print commands
    names = ['job_%s'%i for i in xrange(len(commands))] if names is None else names
    jobs = [Job(command=cmd, name=name) for cmd, name in zip(commands, names)]
    workflow = Workflow(jobs=jobs, dependencies=[])
    Helper.serialize(out, workflow)
Example #2
0
    def _workflow_stop(self):
        self._workflow_controller.stop_workflow(self._workflow_id)

        # transfer back files, if any
        Helper.transfer_output_files(self._workflow_id,
                                     self._workflow_controller)

        interrupted_step_ids = self._get_interrupted_step_ids()
        for subject_id, step_ids in six.iteritems(interrupted_step_ids):
            if step_ids:
                analysis = self._study.analyses[subject_id]
                analysis.clear_results(step_ids)
Example #3
0
    def run(self, subject_ids=ALL_SUBJECTS):
        self._setup_soma_workflow_controller()
        self._init_internal_parameters()
        if self._workflow_controller.scheduler_config:
            # in local mode only
            cpus_number = self._cpus_number()
            self._workflow_controller.scheduler_config.set_proc_nb(cpus_number)
        if subject_ids == ALL_SUBJECTS:
            subject_ids = self._study.subjects
        # setup shared path in study_config
        study_config = self._study
        swf_resource = study_config.somaworkflow_computing_resource
        if not self._workflow_controller.scheduler_config:
            # remote config only
            # FIXME: must check if brainvisa shared dir is known in translation
            # config in soma-workflow
            if not study_config.somaworkflow_computing_resources_config.trait(
                    swf_resource):
                setattr(study_config.somaworkflow_computing_resources_config,
                        swf_resource, {})
            resource_conf = getattr(
                study_config.somaworkflow_computing_resources_config,
                swf_resource)
            path_translations = resource_conf.path_translations
            setattr(path_translations, study_config.shared_directory,
                    ['brainvisa', 'de25977f-abf5-9f1c-4384-2585338cd7af'])

        #self._check_input_files(subject_ids)
        workflow = self._create_workflow(subject_ids)
        jobs = [j for j in workflow.jobs if isinstance(j, Job)]
        if self._workflow_id is not None:
            self._workflow_controller.delete_workflow(self._workflow_id)
        if len(jobs) == 0:
            # empty workflow: nothing to do
            self._workflow_id = None
            return
        self._workflow_id = self._workflow_controller.submit_workflow(
            workflow, name=workflow.name)
        self._build_jobid_to_step()

        # run transfers, if any
        Helper.transfer_input_files(self._workflow_id,
                                    self._workflow_controller)
        # the status does not change immediately after run,
        # so we wait for the status WORKFLOW_IN_PROGRESS or timeout
        status = self._workflow_controller.workflow_status(self._workflow_id)
        try_count = 8
        while ((status != sw.constants.WORKFLOW_IN_PROGRESS) and \
                                                (try_count > 0)):
            time.sleep(0.25)
            status = self._workflow_controller.workflow_status(
                self._workflow_id)
            try_count -= 1
Example #4
0
 def wait(self, subject_id=None, step_id=None):
     if subject_id is None and step_id is None:
         Helper.wait_workflow(
             self._workflow_id, self._workflow_controller)
     elif subject_id is not None:
         if step_id is None:
             raise NotImplementedError
         else:
             self._step_wait(subject_id, step_id)
     else:
         raise NotImplementedError
     # transfer back files, if any
     Helper.transfer_output_files(self._workflow_id,
                                  self._workflow_controller)
Example #5
0
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     """
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     """
     try:
         from soma_workflow.client import Job, Workflow
         from soma_workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = (
             "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n"
         )
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     #        np.savez(os.path.join(tmp_work_dir_path,
     #                 SomaWorkflowEngine.dataset_relative_path), **Xy)
     db_size = estimate_dataset_size(**Xy)
     db_size = int(db_size / (1024 * 1024))  # convert it into mega byte
     save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
     store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = self._create_jobs(keysfile_list, is_run_local=False, ft_working_directory=ft_working_directory)
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui
         )
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
    def test_serialization(self):
        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        self.temporaries += [simple_wf_examples.output_dir,
                             tr_wf_examples.output_dir,
                             srp_wf_examples.output_dir]
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(("special_command",
                          simple_wf_examples.example_special_command()))

        workflows.append(("mutiple_transfer",
                          tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(("special_transfer",
                          tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(("special_command_srp",
                          srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print("Testing", workflow_name)

            file_path = tempfile.mkstemp(prefix="json_",
                                         suffix=workflow_name + ".wf")
            os.close(file_path[0])
            file_path = file_path[1]
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(new_workflow.attributs_equal(workflow),
                            "Serialization failed for workflow %s" %
                            workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
    def test_serialization(self):
        directory = "/tmp/"

        simple_wf_examples = workflow_local.WorkflowExamplesLocal()
        tr_wf_examples = workflow_transfer.WorkflowExamplesTransfer()
        srp_wf_examples = workflow_shared.WorkflowExamplesShared()
        workflows = []
        workflows.append(("multiple", simple_wf_examples.example_multiple()))
        workflows.append(("special_command",
                          simple_wf_examples.example_special_command()))

        workflows.append(("mutiple_transfer",
                          tr_wf_examples.example_multiple()))
        workflows.append(("special_command_transfer",
                          tr_wf_examples.example_special_command()))
        workflows.append(("special_transfer",
                          tr_wf_examples.example_special_transfer()))

        workflows.append(("mutiple_srp", srp_wf_examples.example_multiple()))
        workflows.append(("special_command_srp",
                          srp_wf_examples.example_special_command()))

        for workflow_name, workflow in workflows:
            print "Testing", workflow_name

            file_path = os.path.join(directory,
                                     "json_" + workflow_name + ".wf")
            Helper.serialize(file_path, workflow)

            new_workflow = Helper.unserialize(file_path)

            self.assertTrue(new_workflow.attributs_equal(workflow),
                            "Serialization failed for workflow %s" %
                            workflow_name)

            try:
                os.remove(file_path)
            except IOError:
                pass
Example #8
0
def save_wf(wf, output_file, mode="soma-workflow"):
    """Save the workflow in a file.

    Support simple JSON commands list (cmd-list) or soma-workflow.

    Parameters:
    ----------
    wf : tuple (cmd-dict, dependancies),
        Workflow to save.
    output_file : str,
        filename for the workflow.
    mode : str in ["soma-workflow", "cmd_list"],
           optional (default="soma-workflow")
        format to save the workflow.
    """
    cmd = wf[0]
    dep_orig = wf[1]
    if mode == "soma-workflow":
        from soma_workflow.client import Job, Workflow, Helper
        for k, v in cmd.iteritems():
            cmd[k] = Job(command=v, name=k)
        dep = [((cmd[a], cmd[b])) for a, b in dep_orig]
        jobs = np.asarray(cmd.values())[np.argsort(cmd.keys())]
        workflow = Workflow(jobs=jobs.tolist(),
                            dependencies=dep)
        Helper.serialize(output_file, workflow)
        return workflow
    elif mode == "cmd-list":
        import json
        for k, v in cmd.iteritems():
            cmd[k] = " ".join(v)
        with open(output_file, 'w') as fd:
            json.dump(dict(cmd=cmd, dep=dep_orig), fd, indent=True)
        return cmd
    else:
        raise TypeError("Invalid workflow mode \'{}\'".format(mode))
Example #9
0
 def _get_workflow_status(self):
     sw_status \
         = self._workflow_controller.workflow_status(self._workflow_id)
     if (sw_status in [sw.constants.WORKFLOW_IN_PROGRESS,
                       sw.constants.WORKFLOW_NOT_STARTED]):
         status = Runner.RUNNING
     else:
         has_failed = (len(Helper.list_failed_jobs(
             self._workflow_id, self._workflow_controller,
             include_aborted_jobs=True,
             include_user_killed_jobs=True)) != 0)
         if has_failed:
             status = Runner.FAILED
         else:
             status = Runner.SUCCESS
     return status
    def test_result(self):
        workflow = self.wf_examples.example_wrong_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id,
                                        self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id,
                                         self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        if self.path_management == self.LOCAL_PATH:
            self.assertTrue(nb_failed_aborted_jobs == 0,
                            "nb failed jobs including aborted : %i. "
                            "Expected : %i" % (nb_failed_aborted_jobs, 0))
        else:
            self.assertTrue(nb_failed_aborted_jobs == 1,
                            "nb failed jobs including aborted : %i. "
                            "Expected : %i" % (nb_failed_aborted_jobs, 1))
Example #11
0
    def run(self, **Xy):
        """Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        """
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = (
                "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n"
            )
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Example #12
0
2. copy all the spm batch files in the directory containing the
following python script; then run

python create_workflow_spm12batches.py

This must generate a file called 'spm12_batches.somawf'

3. Launch soma_workflow_gui on he command line in the same directory;
open the spm12_batches.somawf file and press 'submit'

Note: use your own PC if there are many cores. 
"""

import os, sys, glob
from soma_workflow.client import Job, Workflow, Helper

if len(sys.argv) == 1:
    spm12_batches = glob.glob("*.mat")
else:
    spm12_batches = sys.argv[1:]

jobs = []
for b in spm12_batches:
    jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b))

workflow = Workflow(jobs)

Helper.serialize('spm12_batches.somawf', workflow)

print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
Example #13
0
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(['python', '-c', cfunc_marshal, params_fn,
                             func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=['python', '-c', cfunc, param_file, func_file,
                               output_file],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(
            workflow=workflow, name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results
Example #14
0
    def test_result(self):
        workflow = self.wf_examples.example_simple()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))

        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id,
                                                     self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    if job_name == 'job1':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        msg = "job stderr not empty : cf %s\n" \
                            "stderr:\n---\n%s\n---" \
                            % (job_stderr_file, open(job_stderr_file).read())
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        msg)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.lo_file[11])
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.lo_file[12])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.tr_file[11].client_path)
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.tr_file[12].client_path)
                            self.assertTrue(isSame, msg)
                            # For unknown reason, it raises some errors
                            # http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running
                            # isSame,	msg	= identical_files(job_stderr_file,self.wf_examples.lo_stderr[1])
                            # self.failUnless(isSame == True)

                    if job_name in ['job2', 'job3', 'job4']:
                        job_nb = int(job_name[3])
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[job_nb])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        "job stderr not empty : cf %s" %
                                        job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.lo_file[job_nb])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.tr_file[job_nb].client_path)
                            self.assertTrue(isSame, msg)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
            logger.info("epd_to_deploy " + repr(options.epd_to_deploy)) 
            logger.info("untar_directory " + repr(options.untar_directory))
            sch = MPIScheduler(comm, interval=1, nb_attempt_per_job=options.nb_attempt_per_job)

            config.disable_queue_limits()    

            workflow_engine = ConfiguredWorkflowEngine(database_server,
                                                       sch,
                                                       config)
            if options.workflow_file and os.path.exists(options.workflow_file):
                workflow_file = options.workflow_file  
                logger.info(" ")
                logger.info("******* submission of worklfow **********")
                logger.info("workflow file: " + repr(workflow_file))

                workflow = Helper.unserialize(workflow_file)
                workflow_engine.submit_workflow(workflow,
                                                expiration_date=None,
                                                name=None,
                                                queue=None)
            if options.wf_id_to_restart != None:
                workflow_id = options.wf_id_to_restart
                logger.info(" ")
                logger.info("******* restart worklfow **********")
                logger.info("workflow if: " + repr(workflow_id))
                workflow_engine.stop_workflow(workflow_id)
                workflow_engine.restart_workflow(workflow_id, queue=None)
     
            while not workflow_engine.engine_loop.are_jobs_and_workflow_done():
                time.sleep(2)
            for slave in range(1, comm.size):
Example #16
0
                str(alpha)
            ]
            body = initbody + 'RKD.rank_decode(' + ', '.join(arglist) + ')'

            jobname = ('rank' + "_".join([str(con) for con in cond]) + "_win" +
                       "_".join([str(w)
                                 for w in window]) + '_alpha' + str(alpha))
            ListJobName.append(jobname)

            # write jobs in a dedicated folder
            name_file = []
            name_file = os.path.join(path_script,
                                     ("JOBS_RANK/decode_" + jobname + ".py"))
            Listfile.append(name_file)
            with open(name_file, 'w') as python_file:
                python_file.write(body)
                python_file.close()

###########################################################################
jobs = []
for i in range(len(Listfile)):
    JobVar = Job(command=['python', Listfile[i]],
                 name=ListJobName[i],
                 native_specification='-l walltime=1:00:00, -l nodes=1:ppn=1')
    jobs.append(JobVar)

WfVar = Workflow(jobs=jobs, dependencies=[])
# save the workflow into a file
somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_wf_rank_decode")
Helper.serialize(somaWF_name, WfVar)
Example #17
0
    def test_result(self):
        nb = 20
        time_sleep = 1

        workflow = self.wf_examples.example_n_jobs(nb=nb, time=time_sleep)
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    self.assertTrue(
                        os.stat(job_stdout_file).st_size == 0,
                        "job stdout not empty : file: %s, "
                        "contents:\n%s" %
                        (job_stdout_file, open(job_stdout_file).read()))
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : file %s, "
                        "contents:\n%s" %
                        (job_stderr_file, open(job_stderr_file).read()))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #18
0
echo %s
""" % test_bash_script
    fileout.write(filecontent)
    fileout.close()
    os.chdir(cur_work_dir)

    job1 = Job(command=[u"touch", test_filepath],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)
    job2 = Job(command=["%s/readfile" % cur_file_dir, test_bash_script],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)

    soma_workflow = Workflow(jobs=[job1, job2])

    resource_id = socket.gethostname()
    controller = WorkflowController(resource_id, "", "")
    ## run soma-workflow
    ## =================
    wf_id = controller.submit_workflow(workflow=soma_workflow,
                                       name="epac workflow")
    Helper.wait_workflow(wf_id, controller)
    nb_failed_jobs = len(Helper.list_failed_jobs(wf_id, controller))
    if nb_failed_jobs > 0:
        raise ValueError("Soma-workflow error, cannot use working directory")

    if not os.path.isfile(os.path.join(tmp_work_dir_path, test_filepath)):
        raise ValueError("Soma-workflow cannot define working directory")
    else:
        print("OK for creating new file in working directory")
Example #19
0
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(
                ['python', '-c', cfunc_marshal, params_fn, func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'python', '-c', cfunc, param_file, func_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results
Example #20
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call([
                'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn
            ])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
Example #21
0
def run_soma_workflow(treatments,
                      exec_cmd,
                      tmp_local_dirs,
                      server_id,
                      remote_host,
                      remote_user,
                      remote_pathes,
                      local_result_pathes,
                      label_for_cluster,
                      wait_ending=False):
    """Dispatch treatments using soma-workflow.

    Parameters
    ----------
    treatments
        it is a dict mapping a treatment name to a treatment object
    exec_cmd
        it is the command to run on each ROI data.
    tmp_local_dirs
        it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file)
    server_id
        it is the server ID as expected by WorkflowController
    remote_host
        it is the remote machine where treatments are treated in parallel
    remote_user
        it is used to log in remote_host
    remote_pathes
        it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result
        files
    local_result_pathes
        it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it
        there by scp)
    label_for_cluster
        it is the base name used to label workflows and sub jobs
    """

    import getpass
    from socket import gethostname

    local_user = getpass.getuser()
    local_host = gethostname()

    all_nodes = []
    all_deps = []
    all_groups = []
    split_jobs = []
    for t_id, treatment in treatments.iteritems():

        tmp_local_dir = tmp_local_dirs[t_id]
        remote_path = remote_pathes[t_id]
        local_result_path = local_result_pathes[t_id]

        sj, n, d, g = prepare_treatment_jobs(
            treatment, tmp_local_dir, local_result_path, local_user,
            local_host, remote_host, remote_user, remote_path,
            label_for_cluster + '-' + str(t_id))
        all_nodes.extend(n)
        all_deps.extend(d)
        all_groups.append(g)
        split_jobs.append(sj)

    # Jobs for data splitting should be done sequentially.
    # If they're done in parallel, they may flood the remote file system
    for isj in xrange(len(split_jobs)):
        if isj + 1 < len(split_jobs):
            all_deps.append((split_jobs[isj], split_jobs[isj + 1]))

    # # Be sure that all splitting jobs are done first:
    # # Is there a better way ?
    # for n in all_nodes:
    #     for sjob in split_jobs:
    #         all_deps.append((sjob,n))
    # Does not seem to work well -> maybe to many deps ?

    workflow = Workflow(all_nodes + split_jobs,
                        all_deps,
                        root_group=all_groups)

    # f = open('/tmp/workflow.pck','w')
    # cPickle.dump(workflow, f)
    # f.close()

    logger.info('Open connection ...')
    connection = WorkflowController(server_id, remote_user)

    logger.info('Submit workflow ...')
    wf_id = connection.submit_workflow(
        workflow=workflow,
        # expiration_date="",
        # queue="run32",
        name=label_for_cluster + '-' + local_user)
    #wf = connection.workflow(wf_id)

    if wait_ending:  # wait for result
        logger.info('Wait for workflow to end and make outputs ...')
        Helper.wait_workflow(wf_id, connection)

        for t_id, local_result_path in local_result_pathes.iteritems():
            treatment = treatments[t_id]
            rfilename = treatment.result_dump_file
            if rfilename is None:
                rfilename = 'result.pck'
            local_result_file = op.join(local_result_path,
                                        op.basename(rfilename))

            if not op.exists(local_result_file):
                raise Exception('Local result does not exist "%s"' %
                                local_result_file)

        if treatment.analyser.outFile is not None:
            # return result only for last treatment ...
            print 'Load result from %s ...' % local_result_file
            if splitext(local_result_file)[1] == '.gz':
                import gzip
                fresult = gzip.open(local_result_file)
            else:
                fresult = open(local_result_file)
            results = cPickle.load(fresult)
            fresult.close()
            # print 'Make outputs ...'
            #treatment.output(results, dump=False)
            logger.info('Cleaning tmp dirs ...')
            for tmp_dir in tmp_local_dirs.itervalues():
                shutil.rmtree(tmp_dir)

            return results
    else:
        logger.info('Cleaning tmp dirs ...')
        for tmp_dir in tmp_local_dirs.itervalues():
            shutil.rmtree(tmp_dir)

        logger.info('Workflow sent, returning ...')
        return []
Example #22
0
            outer = cmd[5]
            key = outer + design
            if (not dep_dic.has_key(key)):
                dep_dic[key] = []
            cur_job = Job(command=cmd,
                          name="Reduce %s outer %s" % (design, outer))
            red_jobs[key] = cur_job
        elif (cmd[1].endswith("multiblox_comparison.outer.reducer.R")):
            print "multiblox_comparison.outer.reducer"
            cur_job = Job(command=cmd, name="Final Reduce")
            final_red = cur_job
        elif (cmd[1].endswith("coxnet.mapper.R")):
            print "coxnet mapper"
            cur_job = Job(command=cmd, name="Coxnet")
            glmnet_job = cur_job
            pass
        else:
            raise Exception("Unknown task, abort...")
        jobs.append(cur_job)
dependencies = []
for k, j in red_jobs.items():
    parent_list = dep_dic[k]
    for p in parent_list:
        dependencies.append((p, j))
    dependencies.append((j, final_red))
dependencies.append((glmnet_job, final_red))
workflow = Workflow(jobs=jobs, dependencies=dependencies)

# save the workflow into a file
Helper.serialize(workflow_file, workflow)
    def test_result(self):
        workflow = self.wf_examples.example_multiple()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)

        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)

        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 2,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 2))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 6,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 6))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    if job_name == 'job1':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file, self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.lo_file[11])
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.lo_file[12])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.tr_file[11].client_path)
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.tr_file[12].client_path)
                            self.assertTrue(isSame, msg)
                            # For unknown reason, it raises some errors
                            # http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running
                            # isSame,	msg	= identical_files(job_stderr_file,self.wf_examples.lo_stderr[1])
                            # self.failUnless(isSame == True)

                    if job_name in ['job2', 'job3', 'job4']:
                        job_nb = int(job_name[3])
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[job_nb])

                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.lo_file[job_nb])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.tr_file[job_nb].client_path)
                            self.assertTrue(isSame, msg)

                    if job_name in [
                            'job1 with exception', 'job3 with exception'
                    ]:
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_exception_model)
                        self.assertTrue(isSame)
                        # Test stderr
                        with open(job_stderr_file) as f:
                            lines = f.readlines()
                        print(lines)
                        isSame = (lines[-1] == 'Exception: Paf Boum '
                                  'Boum Bada Boum !!!\n')
                        self.assertTrue(isSame,
                                        "Job Exception: %s" % lines[-1])
                finally:
                    if os.path.exists(job_stdout_file):
                        os.unlink(job_stdout_file)
                    if os.path.exists(job_stderr_file):
                        os.unlink(job_stderr_file)
        del self.tested_job
Example #24
0
        "python", "create_maps.py", "--input", derivatives_path,
        "--parameters", parameters_path, "--subject", args.subject,
        "--fmri_data", fmri_path
    ],
                    name="Creating the maps.",
                    working_directory=scripts_path)
    jobs.append(job_final)
    dependencies.append((job_merge, job_final))

    cv_alphas = Group(elements=group_cv_alphas, name="CV on alphas")
    significativity = Group(elements=group_significativity,
                            name="Fit of the models with best alphas")

    workflow = Workflow(
        jobs=jobs,
        dependencies=dependencies,
        root_group=[job_0, cv_alphas, significativity, job_merge, job_final])

    Helper.serialize(os.path.join(inputs_path, 'cluster_jobs.somawf'),
                     workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Ridge - LPP")

    print("Finished !!!")
Example #25
0
        ],
        name="Merging all the r2 and distribution respectively together.",
        working_directory=scripts_path)

    jobs.append(job_merge)

    # Plotting the maps
    job_final = Job(command=[
        "python", "create_maps.py", "--input", derivatives_path,
        "--parameters", parameters_path, "--subject", args.subject,
        "--fmri_data", fmri_path
    ],
                    name="Creating the maps.",
                    working_directory=scripts_path)
    jobs.append(job_final)
    dependencies.append((job_merge, job_final))

    workflow = Workflow(jobs=jobs, dependencies=dependencies)

    Helper.serialize(os.path.join(inputs_path, 'delete.somawf'), workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Ridge - LPP")

    print("Finished !!!")
Example #26
0
file1 = FileTransfer(is_input=True,
                     client_path="%s/create_file.py" %
                     Configuration.get_home_dir(),
                     name="script")

file2 = FileTransfer(is_input=True,
                     client_path="%s/output_file" %
                     Configuration.get_home_dir(),
                     name="file created on the server")

# Job and Workflow
run_script = Job(command=["python", file1, file2],
                 name="copy",
                 referenced_input_files=[file1],
                 referenced_output_files=[file2])

workflow = Workflow(jobs=[run_script], dependencies=[])

workflow_id = controller.submit_workflow(workflow=workflow,
                                         name="Simple transfer")

# You may use the gui or manually transfer the files:
manual = True
if manual:
    Helper.transfer_input_files(workflow_id, controller)
    Helper.wait_workflow(workflow_id, controller)
    Helper.transfer_output_files(workflow_id, controller)

# RemoteConnection.kill_remote_servers("Gabriel")
print("Finished !!!")
Example #27
0
    def test_result(self):
        if hasattr(self.wf_ctrl.scheduler_config, 'get_proc_nb'):
            n_iter = 10 * self.wf_ctrl.scheduler_config.get_proc_nb()
        else:
            n_iter = 100
        workflow = self.wf_examples.example_fake_pipelineT1(n_iter)
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id,
                                        self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id,
                                         self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE)
        self.assertTrue(len(Helper.list_failed_jobs(
                        self.wf_id,
                        self.wf_ctrl)) == 0)
        self.assertTrue(len(Helper.list_failed_jobs(
                        self.wf_id,
                        self.wf_ctrl,
                        include_aborted_jobs=True)) == 0)

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test stdout
                    self.assertTrue(os.stat(job_stdout_file).st_size == 0,
                                    "job stdout not empty : cf %s" %
                                    job_stdout_file)
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : cf %s" %
                                    job_stderr_file)
                finally:
                    if os.path.exists(job_stdout_file):
                        os.unlink(job_stdout_file)
                    if os.path.exists(job_stderr_file):
                        os.unlink(job_stderr_file)

        del self.tested_job
    def test_result(self):
        workflow = self.wf_examples.example_special_transfer()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)

        # Transfer input files
        Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        status = self.wf_ctrl.workflow_status(self.wf_id)
        # Transfer output files
        Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    if job_name == 'dir_contents':
                        # Test job standard out
                        with open(job_stdout_file, 'r+') as f:
                            dir_contents = f.readlines()
                        dir_path_in = self.wf_examples.lo_in_dir
                        full_path_list = []
                        for element in os.listdir(dir_path_in):
                            full_path_list.append(
                                os.path.join(dir_path_in, element))
                        dir_contents_model = list_contents(full_path_list, [])
                        self.assertTrue(
                            sorted(dir_contents) == sorted(dir_contents_model))
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)

                    if job_name == 'multi file format test':
                        # Test job standard out
                        isSame, msg = identical_files(
                            job_stdout_file, self.wf_examples.lo_mff_stdout)
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #29
0
    def test_result(self):
        workflow = self.wf_examples.example_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow, name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE, "workflow status : %s. Expected : %s" % (status, constants.WORKFLOW_DONE)
        )
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0, "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl, include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" % (nb_failed_aborted_jobs, 0),
        )

        (jobs_info, transfers_info, workflow_status, workflow_queue, tmp_files) = self.wf_ctrl.workflow_elements_status(
            self.wf_id
        )

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_", suffix=repr(job_id), delete=False
                )
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_", suffix=repr(job_id), delete=False
                )
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    isSame, msg = identical_files(job_stdout_file, self.wf_examples.lo_stdout[1])
                    self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                    )
                    # Test output files
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11], self.wf_examples.lo_file[11]
                        )
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12], self.wf_examples.lo_file[12]
                        )
                        self.assertTrue(isSame, msg)
                    if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11], self.wf_examples.tr_file[11].client_path
                        )
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12], self.wf_examples.tr_file[12].client_path
                        )
                        self.assertTrue(isSame, msg)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #30
0
            body = body + "'" + Method[M] + "','" + datasource[cc] + "')"

            # use a transparent and complete job name referring to arguments of interest
            tmpname = ""
            for c in range(len(CondComb[cc])):
                tmpname = tmpname + "_" + CondComb[cc][c] + "_" + ListSubj[subj]
            tmpname = tmpname + Method[M] + "_"
            nametag.append(tmpname)

            # write jobs in a dedicated folder
            name_file = []
            name_file = os.path.join(path_script,
                                     ("JOBS_SOURCE/Source_" + tmpname + ".py"))
            Listfile.append(name_file)
            python_file = open(name_file, "w")
            python_file.write(body)
            python_file.close

jobs = []
for i in range(len(Listfile)):
    #job_1 = Job(command=["python", Listfile[i]], name = nametag[i], native_specification="-l walltime=1:00:00, -l nodes=1:ppn=1")
    job_1 = Job(command=["python", Listfile[i]], name=nametag[i])
    jobs.append(job_1)
    workflow = Workflow(jobs=jobs, dependencies=[])

    # save the workflow into a file
    somaWF_name = os.path.join(path_script, "SOMA_WFs/soma_WF_SOURCE")
    Helper.serialize(somaWF_name, workflow)

#####################################################################
Example #31
0
    def test_result(self):
        # Cause all warnings to always be triggered.
        warnings.resetwarnings()
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            workflow = self.wf_examples.example_special_command()
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, UserWarning))
            self.assertTrue(
                "This workflow prints a warning." in str(w[-1].message))

        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        # TODO : sometimes raises an error
        # because status = "workflow_in_progress"

        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))

        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.mkstemp(prefix="job_soma_out_log_",
                                                   suffix=repr(job_id))
                os.close(job_stdout_file[0])
                job_stdout_file = job_stdout_file[1]
                job_stderr_file = tempfile.mkstemp(
                    prefix="job_soma_outerr_log_", suffix=repr(job_id))
                os.close(job_stderr_file[0])
                job_stderr_file = job_stderr_file[1]

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test job stdout
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_local)
                        self.assertTrue(isSame, msg)
                    else:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_remote)
                        self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : cf %s" % job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #32
0
    with open(name_file, 'w') as python_file:
        python_file.write(body)  
   
###############################################################################
# create workflow   
jobs = []
for i in range(len(Listfile)):
    JobVar = Job(command=['python', Listfile[i]], name = ListJobName[i],
                 native_specification = '-l walltime=4:00:00, -l nodes=1:ppn=2')
    jobs.append(JobVar)       

# define dependancies (tuples of two jobs)
# the second job will be executed after the first  
# here, plot the grand average after having written evoked for each subject
n = len(ListSubject)
dependencies =  [(jobs[c*n + s + c],jobs[(c + 1)*n + c])
                for s,subject in enumerate(ListSubject)
                for c,condcouple in enumerate(ListCondition)]   
                
# save the workflow into a file    
WfVar = Workflow(jobs=jobs, dependencies=dependencies)
somaWF_name = os.path.join(wdir, 'somawf/workflows/DEMO_WF')
Helper.serialize(somaWF_name, WfVar)

        




     
                 
Example #33
0
    jobs += group_score + group_significativity + group_merge
    jobs.append(job_final)

    scores = Group(elements=group_score,
                   name="group where test scores are calculated")

    significativity = Group(
        elements=group_significativity,
        name="group where distributions are calculated for significance")

    merge = Group(elements=group_merge, name="group where we merge results")

    workflow = Workflow(jobs=jobs,
                        dependencies=dependencies,
                        root_group=[scores, significativity, merge, job_final])

    Helper.serialize(
        os.path.join(inputs_path, 'optimized_cluster_part_2.somawf'), workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Cluster optimized part 2")

    print("Finished !!!")
Example #34
0
    def test_result(self):
        workflow = self.wf_examples.example_simple_exception1()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id,
                                                     self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 1,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 1))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 4,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 4))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name
                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)

                    if job_name == 'job1 with exception':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_exception_model)
                        self.assertTrue(isSame, msg)
                        # Test the last line of stderr
                        with open(job_stderr_file) as f:
                            lines = f.readlines()
                        expected_error = 'Exception: Paf Boum Boum Bada Boum !!!\n'
                        isSame = (lines[-1] == expected_error)
                        self.assertTrue(isSame,
                                        "Job exception : %s. Expected : %s" %
                                        (lines[-1], expected_error))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #35
0
echo %s
""" % test_bash_script
    fileout.write(filecontent)
    fileout.close()
    os.chdir(cur_work_dir)

    job1 = Job(command=[u"touch", test_filepath],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)
    job2 = Job(command=["%s/readfile" % cur_file_dir, test_bash_script],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)

    soma_workflow = Workflow(jobs=[job1, job2])

    resource_id = socket.gethostname()
    controller = WorkflowController(resource_id, "", "")
    ## run soma-workflow
    ## =================
    wf_id = controller.submit_workflow(workflow=soma_workflow,
                                       name="epac workflow")
    Helper.wait_workflow(wf_id, controller)
    nb_failed_jobs = len(Helper.list_failed_jobs(wf_id, controller))
    if nb_failed_jobs > 0:
        raise ValueError("Soma-workflow error, cannot use working directory")

    if not os.path.isfile(os.path.join(tmp_work_dir_path, test_filepath)):
        raise ValueError("Soma-workflow cannot define working directory")
    else:
        print "OK for creating new file in working directory"
Example #36
0
def run_soma_workflow(treatments, exec_cmd, tmp_local_dirs, server_id,
                      remote_host, remote_user, remote_pathes,
                      local_result_pathes, label_for_cluster,
                      wait_ending=False):
    """Dispatch treatments using soma-workflow.

    Parameters
    ----------
    treatments
        it is a dict mapping a treatment name to a treatment object
    exec_cmd
        it is the command to run on each ROI data.
    tmp_local_dirs
        it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file)
    server_id
        it is the server ID as expected by WorkflowController
    remote_host
        it is the remote machine where treatments are treated in parallel
    remote_user
        it is used to log in remote_host
    remote_pathes
        it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result
        files
    local_result_pathes
        it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it
        there by scp)
    label_for_cluster
        it is the base name used to label workflows and sub jobs
    """

    import getpass
    from socket import gethostname

    local_user = getpass.getuser()
    local_host = gethostname()

    all_nodes = []
    all_deps = []
    all_groups = []
    split_jobs = []
    for t_id, treatment in treatments.iteritems():

        tmp_local_dir = tmp_local_dirs[t_id]
        remote_path = remote_pathes[t_id]
        local_result_path = local_result_pathes[t_id]

        sj, n, d, g = prepare_treatment_jobs(treatment, tmp_local_dir,
                                             local_result_path,
                                             local_user, local_host,
                                             remote_host,
                                             remote_user, remote_path,
                                             label_for_cluster + '-' + str(t_id))
        all_nodes.extend(n)
        all_deps.extend(d)
        all_groups.append(g)
        split_jobs.append(sj)

    # Jobs for data splitting should be done sequentially.
    # If they're done in parallel, they may flood the remote file system
    for isj in xrange(len(split_jobs)):
        if isj + 1 < len(split_jobs):
            all_deps.append((split_jobs[isj], split_jobs[isj + 1]))

    # # Be sure that all splitting jobs are done first:
    # # Is there a better way ?
    # for n in all_nodes:
    #     for sjob in split_jobs:
    #         all_deps.append((sjob,n))
    # Does not seem to work well -> maybe to many deps ?

    workflow = Workflow(
        all_nodes + split_jobs, all_deps, root_group=all_groups)

    # f = open('/tmp/workflow.pck','w')
    # cPickle.dump(workflow, f)
    # f.close()

    logger.info('Open connection ...')
    connection = WorkflowController(server_id, remote_user)

    logger.info('Submit workflow ...')
    wf_id = connection.submit_workflow(workflow=workflow,
                                       # expiration_date="",
                                       # queue="run32",
                                       name=label_for_cluster + '-' +
                                       local_user)
    #wf = connection.workflow(wf_id)

    if wait_ending:  # wait for result
        logger.info('Wait for workflow to end and make outputs ...')
        Helper.wait_workflow(wf_id, connection)

        for t_id, local_result_path in local_result_pathes.iteritems():
            treatment = treatments[t_id]
            rfilename = treatment.result_dump_file
            if rfilename is None:
                rfilename = 'result.pck'
            local_result_file = op.join(local_result_path,
                                        op.basename(rfilename))

            if not op.exists(local_result_file):
                raise Exception('Local result does not exist "%s"'
                                % local_result_file)

        if treatment.analyser.outFile is not None:
            # return result only for last treatment ...
            print 'Load result from %s ...' % local_result_file
            if splitext(local_result_file)[1] == '.gz':
                import gzip
                fresult = gzip.open(local_result_file)
            else:
                fresult = open(local_result_file)
            results = cPickle.load(fresult)
            fresult.close()
            # print 'Make outputs ...'
            #treatment.output(results, dump=False)
            logger.info('Cleaning tmp dirs ...')
            for tmp_dir in tmp_local_dirs.itervalues():
                shutil.rmtree(tmp_dir)

            return results
    else:
        logger.info('Cleaning tmp dirs ...')
        for tmp_dir in tmp_local_dirs.itervalues():
            shutil.rmtree(tmp_dir)

        logger.info('Workflow sent, returning ...')
        return []
Example #37
0
    def test_result(self):
        workflow = self.wf_examples.example_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    isSame, msg = identical_files(
                        job_stdout_file, self.wf_examples.lo_stdout[1])
                    self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : cf %s" % job_stderr_file)
                    # Test output files
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11],
                            self.wf_examples.lo_file[11])
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12],
                            self.wf_examples.lo_file[12])
                        self.assertTrue(isSame, msg)
                    if self.path_management == self.FILE_TRANSFER or \
                            self.path_management == self.SHARED_TRANSFER:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11],
                            self.wf_examples.tr_file[11].client_path)
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12],
                            self.wf_examples.tr_file[12].client_path)
                        self.assertTrue(isSame, msg)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #38
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs)
                 for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call(['python', '-c', cfunc % (fmodule, fname),
                             params_fn, output_fn])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=['pyhrf_exec_pyfunc', fmodule, fname,
                               param_file, output_file],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(
            workflow=workflow, name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
    def test_result(self):
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            workflow = self.wf_examples.example_special_command()
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, UserWarning))
            self.assertTrue("contains single quote. It could fail using DRMAA"
                            in str(w[-1].message))

        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        # TODO : sometimes raises an error
        # because status = "workflow_in_progress"

        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))

        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test job stdout
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_local)
                        self.assertTrue(isSame, msg)
                    else:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_remote)
                        self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : cf %s" %
                                    job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Example #40
0
    def test_result(self):
        nb = 20
        time_sleep = 1

        workflow = self.wf_examples.example_n_jobs(nb=nb, time=time_sleep)
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test stdout
                    self.assertTrue(os.stat(job_stdout_file).st_size == 0,
                                    "job stdout not empty : file: %s, "
                                    "contents:\n%s" %
                                    (job_stdout_file,
                                     open(job_stdout_file).read()))
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : file %s, "
                                    "contents:\n%s" %
                                    (job_stderr_file,
                                     open(job_stderr_file).read()))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
    def test_result(self):
        workflow = self.wf_examples.example_special_transfer()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow, name=self.__class__.__name__)

        # Transfer input files
        Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        status = self.wf_ctrl.workflow_status(self.wf_id)
        # Transfer output files
        Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE, "workflow status : %s. Expected : %s" % (status, constants.WORKFLOW_DONE)
        )
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0, "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl, include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" % (nb_failed_aborted_jobs, 0),
        )

        (jobs_info, transfers_info, workflow_status, workflow_queue, tmp_files) = self.wf_ctrl.workflow_elements_status(
            self.wf_id
        )

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_", suffix=repr(job_id), delete=False
                )
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_", suffix=repr(job_id), delete=False
                )
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id, job_stdout_file, job_stderr_file)
                    if job_name == "dir_contents":
                        # Test job standard out
                        with open(job_stdout_file, "r+") as f:
                            dir_contents = f.readlines()
                        dir_path_in = self.wf_examples.lo_in_dir
                        full_path_list = []
                        for element in os.listdir(dir_path_in):
                            full_path_list.append(os.path.join(dir_path_in, element))
                        dir_contents_model = list_contents(full_path_list, [])
                        self.assertTrue(sorted(dir_contents) == sorted(dir_contents_model))
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                        )

                    if job_name == "multi file format test":
                        # Test job standard out
                        isSame, msg = identical_files(job_stdout_file, self.wf_examples.lo_mff_stdout)
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                        )
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
2. copy all the spm batch files in the directory containing the
following python script; then run

python create_workflow_spm12batches.py

This must generate a file called 'spm12_batches.somawf'

3. Launch soma_workflow_gui on he command line in the same directory;
open the spm12_batches.somawf file and press 'submit'

Note: use your own PC if there are many cores. 
"""

import os, sys, glob
from soma_workflow.client import Job, Workflow, Helper

if len(sys.argv)==1:
    spm12_batches = glob.glob("*.mat")
else:
    spm12_batches = sys.argv[1:]

jobs = []
for b in spm12_batches:
    jobs.append(Job(command=["spm12", "run", os.path.abspath(b)], name=b))

workflow=Workflow(jobs)

Helper.serialize('spm12_batches.somawf', workflow)

print '''Now, you can open 'spm12_batches.somawf' in soma_workflow_gui and submit it'''
Example #43
0
    def test_result(self):
        cmd = [
            sys.executable, '-c',
            'from __future__ import print_function; import sys;'
            'from soma_workflow.test.workflow_tests.test_workflow_config import print_cmdline; '
            'print_cmdline(sys.argv[1:])', 'conf=%(configuration_dict)s'
        ]
        configuration = {'config1': 'value1', 'config2': 'value2'}
        print(cmd)
        job1 = Job(name='job1', command=cmd)
        job2 = Job(name='job2',
                   command=cmd,
                   configuration=configuration,
                   param_dict={})
        job3 = Job(name='job3',
                   command=cmd,
                   configuration=configuration,
                   use_input_params_file=True)
        expected_outputs = {
            'workflow': {
                'job1':
                'args: [\'conf=%(configuration_dict)s\']\n'
                'config:\n'
                'None',
                'job2':
                '''args: ['conf={"config1": "value1", "config2": "value2"}']
conf param
config:
{'config1': 'value1', 'config2': 'value2'}''',
                'job3':
                '''args: [\'conf=%(configuration_dict)s\']
with input file
params:
{'parameters': {}, 'configuration_dict': {'config1': 'value1', 'config2': 'value2'}}
config:
{'config1': 'value1', 'config2': 'value2'}''',
            },
        }
        workflow1 = Workflow(name='workflow', jobs=[job1, job2, job3])
        for workflow in (workflow1, ):
            self.wf_id = self.wf_ctrl.submit_workflow(
                workflow=workflow, name=self.__class__.__name__)
            # Transfer input files if file transfer
            if self.path_management == self.FILE_TRANSFER or \
                    self.path_management == self.SHARED_TRANSFER:
                Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
            # Wait for the workflow to finish
            Helper.wait_workflow(self.wf_id, self.wf_ctrl)
            status = self.wf_ctrl.workflow_status(self.wf_id)
            self.assertTrue(
                status == constants.WORKFLOW_DONE,
                "workflow status : %s. Expected : %s" %
                (status, constants.WORKFLOW_DONE))

            nb_failed_jobs = len(
                Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
            try:
                self.assertTrue(
                    nb_failed_jobs == 0,
                    "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
            except:  # noqa: E722
                print('jobs failed:', file=sys.stderr)
                print(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl),
                      file=sys.stderr)
                raise
            nb_failed_aborted_jobs = len(
                Helper.list_failed_jobs(self.wf_id,
                                        self.wf_ctrl,
                                        include_aborted_jobs=True))
            try:
                self.assertTrue(
                    nb_failed_aborted_jobs == 0,
                    "nb failed jobs including aborted : %i. Expected : %i" %
                    (nb_failed_aborted_jobs, 0))
            except:  # noqa: E722
                print('aborted jobs:', file=sys.stderr)
                print(Helper.list_failed_jobs(self.wf_id,
                                              self.wf_ctrl,
                                              include_aborted_jobs=True),
                      file=sys.stderr)
                raise

            (jobs_info, transfers_info, workflow_status, workflow_queue,
             tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

            for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                    in jobs_info:
                job_list = self.wf_ctrl.jobs([job_id])
                job_name, job_command, job_submission_date = job_list[job_id]

                self.tested_job = job_id

                if exit_info[0] == constants.FINISHED_REGULARLY:
                    # To check job standard out and standard err
                    job_stdout_file = tempfile.NamedTemporaryFile(
                        prefix="job_soma_out_log_",
                        suffix=repr(job_id),
                        delete=False)
                    job_stdout_file = job_stdout_file.name
                    job_stderr_file = tempfile.NamedTemporaryFile(
                        prefix="job_soma_outerr_log_",
                        suffix=repr(job_id),
                        delete=False)
                    job_stderr_file = job_stderr_file.name

                    try:
                        self.wf_ctrl.retrieve_job_stdouterr(
                            job_id, job_stdout_file, job_stderr_file)
                        output \
                            = open(job_stdout_file).read().strip().split('\n')
                        exp_wf_outputs = expected_outputs[workflow.name]
                        if job_name in exp_wf_outputs:
                            exp = exp_wf_outputs[job_name].split('\n')
                            #print('### job', job_name, 'output:', output, file=sys.stderr)
                            #print('### expected:', exp, file=sys.stderr)
                            #print('### res:', [o in output for o in exp], file=sys.stderr)
                            self.assertTrue(all([o in output for o in exp]))
                    finally:
                        os.unlink(job_stdout_file)
                        os.unlink(job_stderr_file)

            del self.tested_job