Ejemplo n.º 1
0
 def wait(self, subject_id=None, step_id=None):
     if subject_id is None and step_id is None:
         Helper.wait_workflow(
             self._workflow_id, self._workflow_controller)
     elif subject_id is not None:
         if step_id is None:
             raise NotImplementedError
         else:
             self._step_wait(subject_id, step_id)
     else:
         raise NotImplementedError
     # transfer back files, if any
     Helper.transfer_output_files(self._workflow_id,
                                  self._workflow_controller)
    def test_result(self):
        workflow = self.wf_examples.example_wrong_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id,
                                        self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id,
                                         self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        if self.path_management == self.LOCAL_PATH:
            self.assertTrue(nb_failed_aborted_jobs == 0,
                            "nb failed jobs including aborted : %i. "
                            "Expected : %i" % (nb_failed_aborted_jobs, 0))
        else:
            self.assertTrue(nb_failed_aborted_jobs == 1,
                            "nb failed jobs including aborted : %i. "
                            "Expected : %i" % (nb_failed_aborted_jobs, 1))
    def test_result(self):
        workflow = self.wf_examples.example_wrong_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        if self.path_management == self.LOCAL_PATH \
                or self.wf_ctrl.get_scheduler_type() != 'pbs':
            self.assertTrue(
                nb_failed_aborted_jobs == 0,
                "nb failed jobs including aborted : %i. "
                "Expected : %i" % (nb_failed_aborted_jobs, 0))
        else:
            self.assertTrue(
                nb_failed_aborted_jobs == 1,
                "nb failed jobs including aborted : %i. "
                "Expected : %i" % (nb_failed_aborted_jobs, 1))
Ejemplo n.º 4
0
echo %s
""" % test_bash_script
    fileout.write(filecontent)
    fileout.close()
    os.chdir(cur_work_dir)

    job1 = Job(command=[u"touch", test_filepath],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)
    job2 = Job(command=["%s/readfile" % cur_file_dir, test_bash_script],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)

    soma_workflow = Workflow(jobs=[job1, job2])

    resource_id = socket.gethostname()
    controller = WorkflowController(resource_id, "", "")
    ## run soma-workflow
    ## =================
    wf_id = controller.submit_workflow(workflow=soma_workflow,
                                       name="epac workflow")
    Helper.wait_workflow(wf_id, controller)
    nb_failed_jobs = len(Helper.list_failed_jobs(wf_id, controller))
    if nb_failed_jobs > 0:
        raise ValueError("Soma-workflow error, cannot use working directory")

    if not os.path.isfile(os.path.join(tmp_work_dir_path, test_filepath)):
        raise ValueError("Soma-workflow cannot define working directory")
    else:
        print "OK for creating new file in working directory"
Ejemplo n.º 5
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        '''
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True,
                                                client_path=tmp_work_dir_path,
                                                name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local,
                                 ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow",
                                           queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Ejemplo n.º 6
0
    def test_result(self):
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            workflow = self.wf_examples.example_special_command()
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, UserWarning))
            self.assertTrue("contains single quote. It could fail using DRMAA"
                            in str(w[-1].message))

        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        # TODO : sometimes raises an error
        # because status = "workflow_in_progress"

        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))

        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test job stdout
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_local)
                        self.assertTrue(isSame, msg)
                    else:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_remote)
                        self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : cf %s" % job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 7
0
file1 = FileTransfer(is_input=True,
                     client_path="%s/create_file.py" %
                     Configuration.get_home_dir(),
                     name="script")

file2 = FileTransfer(is_input=True,
                     client_path="%s/output_file" %
                     Configuration.get_home_dir(),
                     name="file created on the server")

# Job and Workflow
run_script = Job(command=["python", file1, file2],
                 name="copy",
                 referenced_input_files=[file1],
                 referenced_output_files=[file2])

workflow = Workflow(jobs=[run_script], dependencies=[])

workflow_id = controller.submit_workflow(workflow=workflow,
                                         name="Simple transfer")

# You may use the gui or manually transfer the files:
manual = True
if manual:
    Helper.transfer_input_files(workflow_id, controller)
    Helper.wait_workflow(workflow_id, controller)
    Helper.transfer_output_files(workflow_id, controller)

# RemoteConnection.kill_remote_servers("Gabriel")
print("Finished !!!")
Ejemplo n.º 8
0
    def test_result(self):
        cmd = [
            sys.executable, '-c',
            'from __future__ import print_function; import sys;'
            'from soma_workflow.test.workflow_tests.test_workflow_config import print_cmdline; '
            'print_cmdline(sys.argv[1:])', 'conf=%(configuration_dict)s'
        ]
        configuration = {'config1': 'value1', 'config2': 'value2'}
        print(cmd)
        job1 = Job(name='job1', command=cmd)
        job2 = Job(name='job2',
                   command=cmd,
                   configuration=configuration,
                   param_dict={})
        job3 = Job(name='job3',
                   command=cmd,
                   configuration=configuration,
                   use_input_params_file=True)
        expected_outputs = {
            'workflow': {
                'job1':
                'args: [\'conf=%(configuration_dict)s\']\n'
                'config:\n'
                'None',
                'job2':
                '''args: ['conf={"config1": "value1", "config2": "value2"}']
conf param
config:
{'config1': 'value1', 'config2': 'value2'}''',
                'job3':
                '''args: [\'conf=%(configuration_dict)s\']
with input file
params:
{'parameters': {}, 'configuration_dict': {'config1': 'value1', 'config2': 'value2'}}
config:
{'config1': 'value1', 'config2': 'value2'}''',
            },
        }
        workflow1 = Workflow(name='workflow', jobs=[job1, job2, job3])
        for workflow in (workflow1, ):
            self.wf_id = self.wf_ctrl.submit_workflow(
                workflow=workflow, name=self.__class__.__name__)
            # Transfer input files if file transfer
            if self.path_management == self.FILE_TRANSFER or \
                    self.path_management == self.SHARED_TRANSFER:
                Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
            # Wait for the workflow to finish
            Helper.wait_workflow(self.wf_id, self.wf_ctrl)
            status = self.wf_ctrl.workflow_status(self.wf_id)
            self.assertTrue(
                status == constants.WORKFLOW_DONE,
                "workflow status : %s. Expected : %s" %
                (status, constants.WORKFLOW_DONE))

            nb_failed_jobs = len(
                Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
            try:
                self.assertTrue(
                    nb_failed_jobs == 0,
                    "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
            except:  # noqa: E722
                print('jobs failed:', file=sys.stderr)
                print(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl),
                      file=sys.stderr)
                raise
            nb_failed_aborted_jobs = len(
                Helper.list_failed_jobs(self.wf_id,
                                        self.wf_ctrl,
                                        include_aborted_jobs=True))
            try:
                self.assertTrue(
                    nb_failed_aborted_jobs == 0,
                    "nb failed jobs including aborted : %i. Expected : %i" %
                    (nb_failed_aborted_jobs, 0))
            except:  # noqa: E722
                print('aborted jobs:', file=sys.stderr)
                print(Helper.list_failed_jobs(self.wf_id,
                                              self.wf_ctrl,
                                              include_aborted_jobs=True),
                      file=sys.stderr)
                raise

            (jobs_info, transfers_info, workflow_status, workflow_queue,
             tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

            for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                    in jobs_info:
                job_list = self.wf_ctrl.jobs([job_id])
                job_name, job_command, job_submission_date = job_list[job_id]

                self.tested_job = job_id

                if exit_info[0] == constants.FINISHED_REGULARLY:
                    # To check job standard out and standard err
                    job_stdout_file = tempfile.NamedTemporaryFile(
                        prefix="job_soma_out_log_",
                        suffix=repr(job_id),
                        delete=False)
                    job_stdout_file = job_stdout_file.name
                    job_stderr_file = tempfile.NamedTemporaryFile(
                        prefix="job_soma_outerr_log_",
                        suffix=repr(job_id),
                        delete=False)
                    job_stderr_file = job_stderr_file.name

                    try:
                        self.wf_ctrl.retrieve_job_stdouterr(
                            job_id, job_stdout_file, job_stderr_file)
                        output \
                            = open(job_stdout_file).read().strip().split('\n')
                        exp_wf_outputs = expected_outputs[workflow.name]
                        if job_name in exp_wf_outputs:
                            exp = exp_wf_outputs[job_name].split('\n')
                            #print('### job', job_name, 'output:', output, file=sys.stderr)
                            #print('### expected:', exp, file=sys.stderr)
                            #print('### res:', [o in output for o in exp], file=sys.stderr)
                            self.assertTrue(all([o in output for o in exp]))
                    finally:
                        os.unlink(job_stdout_file)
                        os.unlink(job_stderr_file)

            del self.tested_job
Ejemplo n.º 9
0
    def test_result(self):
        workflow = self.wf_examples.example_simple_exception2()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__,
                                                  queue='Cati_run4')
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 1,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 1))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 2,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 2))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    if job_name == 'job1':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file, self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.lo_file[11])
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.lo_file[12])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.tr_file[11].client_path)
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.tr_file[12].client_path)
                            self.assertTrue(isSame, msg)

                    if job_name == 'job2':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file, self.wf_examples.lo_stdout[2])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[2],
                                self.wf_examples.lo_file[2])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[2],
                                self.wf_examples.tr_file[2].client_path)
                            self.assertTrue(isSame, msg)

                    if job_name == 'job3 with exception':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_exception_model)
                        self.assertTrue(isSame, msg)
                        # Test the last line of stderr
                        with open(job_stderr_file) as f:
                            lines = f.readlines()
                        expected_error = 'Exception: Paf Boum Boum Bada Boum !!!\n'
                        isSame = (lines[-1] == expected_error)
                        self.assertTrue(
                            isSame, "Job exception : %s. Expected : %s" %
                            (lines[-1], expected_error))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 10
0
    def run(self, **Xy):
        """Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        """
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = (
                "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n"
            )
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Ejemplo n.º 11
0
def run_soma_workflow(treatments, exec_cmd, tmp_local_dirs, server_id,
                      remote_host, remote_user, remote_pathes,
                      local_result_pathes, label_for_cluster,
                      wait_ending=False):
    """Dispatch treatments using soma-workflow.

    Parameters
    ----------
    treatments
        it is a dict mapping a treatment name to a treatment object
    exec_cmd
        it is the command to run on each ROI data.
    tmp_local_dirs
        it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file)
    server_id
        it is the server ID as expected by WorkflowController
    remote_host
        it is the remote machine where treatments are treated in parallel
    remote_user
        it is used to log in remote_host
    remote_pathes
        it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result
        files
    local_result_pathes
        it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it
        there by scp)
    label_for_cluster
        it is the base name used to label workflows and sub jobs
    """

    import getpass
    from socket import gethostname

    local_user = getpass.getuser()
    local_host = gethostname()

    all_nodes = []
    all_deps = []
    all_groups = []
    split_jobs = []
    for t_id, treatment in treatments.iteritems():

        tmp_local_dir = tmp_local_dirs[t_id]
        remote_path = remote_pathes[t_id]
        local_result_path = local_result_pathes[t_id]

        sj, n, d, g = prepare_treatment_jobs(treatment, tmp_local_dir,
                                             local_result_path,
                                             local_user, local_host,
                                             remote_host,
                                             remote_user, remote_path,
                                             label_for_cluster + '-' + str(t_id))
        all_nodes.extend(n)
        all_deps.extend(d)
        all_groups.append(g)
        split_jobs.append(sj)

    # Jobs for data splitting should be done sequentially.
    # If they're done in parallel, they may flood the remote file system
    for isj in xrange(len(split_jobs)):
        if isj + 1 < len(split_jobs):
            all_deps.append((split_jobs[isj], split_jobs[isj + 1]))

    # # Be sure that all splitting jobs are done first:
    # # Is there a better way ?
    # for n in all_nodes:
    #     for sjob in split_jobs:
    #         all_deps.append((sjob,n))
    # Does not seem to work well -> maybe to many deps ?

    workflow = Workflow(
        all_nodes + split_jobs, all_deps, root_group=all_groups)

    # f = open('/tmp/workflow.pck','w')
    # cPickle.dump(workflow, f)
    # f.close()

    logger.info('Open connection ...')
    connection = WorkflowController(server_id, remote_user)

    logger.info('Submit workflow ...')
    wf_id = connection.submit_workflow(workflow=workflow,
                                       # expiration_date="",
                                       # queue="run32",
                                       name=label_for_cluster + '-' +
                                       local_user)
    #wf = connection.workflow(wf_id)

    if wait_ending:  # wait for result
        logger.info('Wait for workflow to end and make outputs ...')
        Helper.wait_workflow(wf_id, connection)

        for t_id, local_result_path in local_result_pathes.iteritems():
            treatment = treatments[t_id]
            rfilename = treatment.result_dump_file
            if rfilename is None:
                rfilename = 'result.pck'
            local_result_file = op.join(local_result_path,
                                        op.basename(rfilename))

            if not op.exists(local_result_file):
                raise Exception('Local result does not exist "%s"'
                                % local_result_file)

        if treatment.analyser.outFile is not None:
            # return result only for last treatment ...
            print 'Load result from %s ...' % local_result_file
            if splitext(local_result_file)[1] == '.gz':
                import gzip
                fresult = gzip.open(local_result_file)
            else:
                fresult = open(local_result_file)
            results = cPickle.load(fresult)
            fresult.close()
            # print 'Make outputs ...'
            #treatment.output(results, dump=False)
            logger.info('Cleaning tmp dirs ...')
            for tmp_dir in tmp_local_dirs.itervalues():
                shutil.rmtree(tmp_dir)

            return results
    else:
        logger.info('Cleaning tmp dirs ...')
        for tmp_dir in tmp_local_dirs.itervalues():
            shutil.rmtree(tmp_dir)

        logger.info('Workflow sent, returning ...')
        return []
Ejemplo n.º 12
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs)
                 for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call(['python', '-c', cfunc % (fmodule, fname),
                             params_fn, output_fn])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=['pyhrf_exec_pyfunc', fmodule, fname,
                               param_file, output_file],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(
            workflow=workflow, name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
Ejemplo n.º 13
0
    def run_workflow(self, workflow, test_files=[], test_dyn_files={}):
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))

        failed_jobs = Helper.list_failed_jobs(self.wf_id, self.wf_ctrl)
        nb_failed_jobs = len(failed_jobs)
        if nb_failed_jobs != 0:
            self.print_jobs(failed_jobs, 'Failed jobs')

        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        failed_aborted_jobs = Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True)
        nb_failed_aborted_jobs = len(failed_aborted_jobs)
        if nb_failed_aborted_jobs != 0:
            self.print_jobs(failed_aborted_jobs, 'Aborted jobs')
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        dyn_out_params = {1: 'filePathOut2',
                          2: 'filePathOut'}
        dyn_out_params = {}

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    if job_name.startswith('job1'):
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        with open(job_stderr_file) as f:
                            msg = "job stderr not empty : cf %s\n" \
                                "stderr:\n---\n%s\n---" \
                                % (job_stderr_file, f.read())
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        msg)

                    if job_name in test_dyn_files:
                        out_params = self.wf_ctrl.get_job_output_params(job_id)
                        dyn_out_params[job_name] = out_params

                    # For unknown reason, it raises some errors
                    # http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running
                    # isSame,	msg	= identical_files(job_stderr_file,self.wf_examples.lo_stderr[1])
                    # self.failUnless(isSame == True)

                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        for out_file_num in test_files:
            # Test output files
            if self.path_management == self.LOCAL_PATH:
                out_file = self.wf_examples.lo_file[out_file_num]
            elif self.path_management == self.FILE_TRANSFER or \
                    self.path_management == self.SHARED_TRANSFER:
                out_file = self.wf_examples.tr_file[out_file_num].client_path

            isSame, msg = identical_files(
                self.wf_examples.lo_out_model_file[out_file_num], out_file)
            self.assertTrue(isSame, msg)

        for job_name, ref_out_params in six.iteritems(test_dyn_files):
            out_params = dyn_out_params[job_name]
            for param, file_num in six.iteritems(ref_out_params):
                isSame, msg = identical_files(
                    self.wf_examples.lo_out_model_file[file_num],
                    out_params[param])
                self.assertTrue(isSame, msg)


        del self.tested_job
Ejemplo n.º 14
0
    def test_result(self):
        workflow = self.wf_examples.example_simple_exception1()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id,
                                                     self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 1,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 1))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 4,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 4))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name
                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)

                    if job_name == 'job1 with exception':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_exception_model)
                        self.assertTrue(isSame, msg)
                        # Test the last line of stderr
                        with open(job_stderr_file) as f:
                            lines = f.readlines()
                        expected_error = 'Exception: Paf Boum Boum Bada Boum !!!\n'
                        isSame = (lines[-1] == expected_error)
                        self.assertTrue(isSame,
                                        "Job exception : %s. Expected : %s" %
                                        (lines[-1], expected_error))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 15
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call([
                'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn
            ])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
Ejemplo n.º 16
0
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(
                ['python', '-c', cfunc_marshal, params_fn, func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'python', '-c', cfunc, param_file, func_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results
Ejemplo n.º 17
0
def run_soma_workflow(treatments,
                      exec_cmd,
                      tmp_local_dirs,
                      server_id,
                      remote_host,
                      remote_user,
                      remote_pathes,
                      local_result_pathes,
                      label_for_cluster,
                      wait_ending=False):
    """Dispatch treatments using soma-workflow.

    Parameters
    ----------
    treatments
        it is a dict mapping a treatment name to a treatment object
    exec_cmd
        it is the command to run on each ROI data.
    tmp_local_dirs
        it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file)
    server_id
        it is the server ID as expected by WorkflowController
    remote_host
        it is the remote machine where treatments are treated in parallel
    remote_user
        it is used to log in remote_host
    remote_pathes
        it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result
        files
    local_result_pathes
        it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it
        there by scp)
    label_for_cluster
        it is the base name used to label workflows and sub jobs
    """

    import getpass
    from socket import gethostname

    local_user = getpass.getuser()
    local_host = gethostname()

    all_nodes = []
    all_deps = []
    all_groups = []
    split_jobs = []
    for t_id, treatment in treatments.iteritems():

        tmp_local_dir = tmp_local_dirs[t_id]
        remote_path = remote_pathes[t_id]
        local_result_path = local_result_pathes[t_id]

        sj, n, d, g = prepare_treatment_jobs(
            treatment, tmp_local_dir, local_result_path, local_user,
            local_host, remote_host, remote_user, remote_path,
            label_for_cluster + '-' + str(t_id))
        all_nodes.extend(n)
        all_deps.extend(d)
        all_groups.append(g)
        split_jobs.append(sj)

    # Jobs for data splitting should be done sequentially.
    # If they're done in parallel, they may flood the remote file system
    for isj in xrange(len(split_jobs)):
        if isj + 1 < len(split_jobs):
            all_deps.append((split_jobs[isj], split_jobs[isj + 1]))

    # # Be sure that all splitting jobs are done first:
    # # Is there a better way ?
    # for n in all_nodes:
    #     for sjob in split_jobs:
    #         all_deps.append((sjob,n))
    # Does not seem to work well -> maybe to many deps ?

    workflow = Workflow(all_nodes + split_jobs,
                        all_deps,
                        root_group=all_groups)

    # f = open('/tmp/workflow.pck','w')
    # cPickle.dump(workflow, f)
    # f.close()

    logger.info('Open connection ...')
    connection = WorkflowController(server_id, remote_user)

    logger.info('Submit workflow ...')
    wf_id = connection.submit_workflow(
        workflow=workflow,
        # expiration_date="",
        # queue="run32",
        name=label_for_cluster + '-' + local_user)
    #wf = connection.workflow(wf_id)

    if wait_ending:  # wait for result
        logger.info('Wait for workflow to end and make outputs ...')
        Helper.wait_workflow(wf_id, connection)

        for t_id, local_result_path in local_result_pathes.iteritems():
            treatment = treatments[t_id]
            rfilename = treatment.result_dump_file
            if rfilename is None:
                rfilename = 'result.pck'
            local_result_file = op.join(local_result_path,
                                        op.basename(rfilename))

            if not op.exists(local_result_file):
                raise Exception('Local result does not exist "%s"' %
                                local_result_file)

        if treatment.analyser.outFile is not None:
            # return result only for last treatment ...
            print 'Load result from %s ...' % local_result_file
            if splitext(local_result_file)[1] == '.gz':
                import gzip
                fresult = gzip.open(local_result_file)
            else:
                fresult = open(local_result_file)
            results = cPickle.load(fresult)
            fresult.close()
            # print 'Make outputs ...'
            #treatment.output(results, dump=False)
            logger.info('Cleaning tmp dirs ...')
            for tmp_dir in tmp_local_dirs.itervalues():
                shutil.rmtree(tmp_dir)

            return results
    else:
        logger.info('Cleaning tmp dirs ...')
        for tmp_dir in tmp_local_dirs.itervalues():
            shutil.rmtree(tmp_dir)

        logger.info('Workflow sent, returning ...')
        return []
Ejemplo n.º 18
0
    def test_result(self):
        workflow = self.wf_examples.example_multiple()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)

        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)

        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 2,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 2))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 6,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 6))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    if job_name == 'job1':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file, self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.lo_file[11])
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.lo_file[12])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.tr_file[11].client_path)
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.tr_file[12].client_path)
                            self.assertTrue(isSame, msg)
                            # For unknown reason, it raises some errors
                            # http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running
                            # isSame,	msg	= identical_files(job_stderr_file,self.wf_examples.lo_stderr[1])
                            # self.failUnless(isSame == True)

                    if job_name in ['job2', 'job3', 'job4']:
                        job_nb = int(job_name[3])
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[job_nb])

                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0,
                            "job stderr not empty : cf %s" % job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.lo_file[job_nb])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.tr_file[job_nb].client_path)
                            self.assertTrue(isSame, msg)

                    if job_name in [
                            'job1 with exception', 'job3 with exception'
                    ]:
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_exception_model)
                        self.assertTrue(isSame)
                        # Test stderr
                        with open(job_stderr_file) as f:
                            lines = f.readlines()
                        print(lines)
                        isSame = (lines[-1] == 'Exception: Paf Boum '
                                  'Boum Bada Boum !!!\n')
                        self.assertTrue(isSame,
                                        "Job Exception: %s" % lines[-1])
                finally:
                    if os.path.exists(job_stdout_file):
                        os.unlink(job_stdout_file)
                    if os.path.exists(job_stderr_file):
                        os.unlink(job_stderr_file)
        del self.tested_job
Ejemplo n.º 19
0
    def test_result(self):
        if hasattr(self.wf_ctrl.scheduler_config, 'get_proc_nb'):
            n_iter = 10 * self.wf_ctrl.scheduler_config.get_proc_nb()
        else:
            n_iter = 100
        workflow = self.wf_examples.example_fake_pipelineT1(n_iter)
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id,
                                        self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id,
                                         self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE)
        self.assertTrue(len(Helper.list_failed_jobs(
                        self.wf_id,
                        self.wf_ctrl)) == 0)
        self.assertTrue(len(Helper.list_failed_jobs(
                        self.wf_id,
                        self.wf_ctrl,
                        include_aborted_jobs=True)) == 0)

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test stdout
                    self.assertTrue(os.stat(job_stdout_file).st_size == 0,
                                    "job stdout not empty : cf %s" %
                                    job_stdout_file)
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : cf %s" %
                                    job_stderr_file)
                finally:
                    if os.path.exists(job_stdout_file):
                        os.unlink(job_stdout_file)
                    if os.path.exists(job_stderr_file):
                        os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 20
0
    def test_result(self):
        workflow = self.wf_examples.example_native_spec_pbs()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow, name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE, "workflow status : %s. Expected : %s" % (status, constants.WORKFLOW_DONE)
        )
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0, "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl, include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" % (nb_failed_aborted_jobs, 0),
        )

        (jobs_info, transfers_info, workflow_status, workflow_queue, tmp_files) = self.wf_ctrl.workflow_elements_status(
            self.wf_id
        )

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_", suffix=repr(job_id), delete=False
                )
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_", suffix=repr(job_id), delete=False
                )
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    isSame, msg = identical_files(job_stdout_file, self.wf_examples.lo_stdout[1])
                    self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                    )
                    # Test output files
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11], self.wf_examples.lo_file[11]
                        )
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12], self.wf_examples.lo_file[12]
                        )
                        self.assertTrue(isSame, msg)
                    if self.path_management == self.FILE_TRANSFER or self.path_management == self.SHARED_TRANSFER:
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[11], self.wf_examples.tr_file[11].client_path
                        )
                        self.assertTrue(isSame, msg)
                        isSame, msg = identical_files(
                            self.wf_examples.lo_out_model_file[12], self.wf_examples.tr_file[12].client_path
                        )
                        self.assertTrue(isSame, msg)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 21
0
    def test_result(self):
        if hasattr(self.wf_ctrl.scheduler_config, 'get_proc_nb'):
            n_iter = 5 * self.wf_ctrl.scheduler_config.get_proc_nb()
        else:
            n_iter = 5
        workflow = self.wf_examples.example_fake_pipelineT1(n_iter)
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE)
        self.assertTrue(
            len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl)) == 0)
        self.assertTrue(
            len(
                Helper.list_failed_jobs(
                    self.wf_id, self.wf_ctrl, include_aborted_jobs=True)) == 0)

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, dmraa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    self.assertTrue(
                        os.stat(job_stdout_file).st_size == 0,
                        "job stdout not empty : cf %s" % job_stdout_file)
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : cf %s" % job_stderr_file)
                finally:
                    if os.path.exists(job_stdout_file):
                        os.unlink(job_stdout_file)
                    if os.path.exists(job_stderr_file):
                        os.unlink(job_stderr_file)

            del self.tested_job
Ejemplo n.º 22
0
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(['python', '-c', cfunc_marshal, params_fn,
                             func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=['python', '-c', cfunc, param_file, func_file,
                               output_file],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(
            workflow=workflow, name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results
Ejemplo n.º 23
0
    def test_result(self):
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            # Trigger a warning.
            workflow = self.wf_examples.example_special_command()
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, UserWarning))
            self.assertTrue("contains single quote. It could fail using DRMAA"
                            in str(w[-1].message))

        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        # TODO : sometimes raises an error
        # because status = "workflow_in_progress"

        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))

        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test job stdout
                    if self.path_management == self.LOCAL_PATH:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_local)
                        self.assertTrue(isSame, msg)
                    else:
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout_command_remote)
                        self.assertTrue(isSame, msg)
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : cf %s" %
                                    job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 24
0
    def test_result(self):
        workflow = self.wf_examples.example_simple()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))

        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id,
                                                     self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    if job_name == 'job1':
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[1])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        msg = "job stderr not empty : cf %s\n" \
                            "stderr:\n---\n%s\n---" \
                            % (job_stderr_file, open(job_stderr_file).read())
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        msg)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.lo_file[11])
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.lo_file[12])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[11],
                                self.wf_examples.tr_file[11].client_path)
                            self.assertTrue(isSame, msg)
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[12],
                                self.wf_examples.tr_file[12].client_path)
                            self.assertTrue(isSame, msg)
                            # For unknown reason, it raises some errors
                            # http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running
                            # isSame,	msg	= identical_files(job_stderr_file,self.wf_examples.lo_stderr[1])
                            # self.failUnless(isSame == True)

                    if job_name in ['job2', 'job3', 'job4']:
                        job_nb = int(job_name[3])
                        # Test stdout
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_stdout[job_nb])
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        "job stderr not empty : cf %s" %
                                        job_stderr_file)
                        # Test output files
                        if self.path_management == self.LOCAL_PATH:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.lo_file[job_nb])
                            self.assertTrue(isSame, msg)
                        if self.path_management == self.FILE_TRANSFER or \
                                self.path_management == self.SHARED_TRANSFER:
                            isSame, msg = identical_files(
                                self.wf_examples.lo_out_model_file[job_nb],
                                self.wf_examples.tr_file[job_nb].client_path)
                            self.assertTrue(isSame, msg)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
    def test_result(self):
        workflow = self.wf_examples.example_special_transfer()
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)

        # Transfer input files
        Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        status = self.wf_ctrl.workflow_status(self.wf_id)
        # Transfer output files
        Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    if job_name == 'dir_contents':
                        # Test job standard out
                        with open(job_stdout_file, 'r+') as f:
                            dir_contents = f.readlines()
                        dir_path_in = self.wf_examples.lo_in_dir
                        full_path_list = []
                        for element in os.listdir(dir_path_in):
                            full_path_list.append(os.path.join(dir_path_in,
                                                               element))
                        dir_contents_model = list_contents(full_path_list, [])
                        self.assertTrue(
                            sorted(dir_contents) == sorted(dir_contents_model))
                        # Test no stderr
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        "job stderr not empty : cf %s" %
                                        job_stderr_file)

                    if job_name == 'multi file format test':
                        # Test job standard out
                        isSame, msg = identical_files(
                            job_stdout_file,
                            self.wf_examples.lo_mff_stdout)
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                        "job stderr not empty : cf %s" %
                                        job_stderr_file)
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 26
0
    def test_result(self):
        nb = 20
        time_sleep = 1

        workflow = self.wf_examples.example_n_jobs(nb=nb, time=time_sleep)
        self.wf_id = self.wf_ctrl.submit_workflow(
            workflow=workflow,
            name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(status == constants.WORKFLOW_DONE,
                        "workflow status : %s. Expected : %s" %
                        (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0,
                        "nb failed jobs : %i. Expected : %i" %
                        (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(
            self.wf_id,
            self.wf_ctrl,
            include_aborted_jobs=True))
        self.assertTrue(nb_failed_aborted_jobs == 0,
                        "nb failed jobs including aborted : %i. Expected : %i"
                        % (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
            tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id,
                                                        job_stdout_file,
                                                        job_stderr_file)
                    # Test stdout
                    self.assertTrue(os.stat(job_stdout_file).st_size == 0,
                                    "job stdout not empty : file: %s, "
                                    "contents:\n%s" %
                                    (job_stdout_file,
                                     open(job_stdout_file).read()))
                    # Test no stderr
                    self.assertTrue(os.stat(job_stderr_file).st_size == 0,
                                    "job stderr not empty : file %s, "
                                    "contents:\n%s" %
                                    (job_stderr_file,
                                     open(job_stderr_file).read()))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 27
0
    def test_result(self):
        workflow = self.wf_examples.example_special_transfer()
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow, name=self.__class__.__name__)

        # Transfer input files
        Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the worklow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        status = self.wf_ctrl.workflow_status(self.wf_id)
        # Transfer output files
        Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE, "workflow status : %s. Expected : %s" % (status, constants.WORKFLOW_DONE)
        )
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(nb_failed_jobs == 0, "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl, include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" % (nb_failed_aborted_jobs, 0),
        )

        (jobs_info, transfers_info, workflow_status, workflow_queue, tmp_files) = self.wf_ctrl.workflow_elements_status(
            self.wf_id
        )

        for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_", suffix=repr(job_id), delete=False
                )
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_", suffix=repr(job_id), delete=False
                )
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(job_id, job_stdout_file, job_stderr_file)
                    if job_name == "dir_contents":
                        # Test job standard out
                        with open(job_stdout_file, "r+") as f:
                            dir_contents = f.readlines()
                        dir_path_in = self.wf_examples.lo_in_dir
                        full_path_list = []
                        for element in os.listdir(dir_path_in):
                            full_path_list.append(os.path.join(dir_path_in, element))
                        dir_contents_model = list_contents(full_path_list, [])
                        self.assertTrue(sorted(dir_contents) == sorted(dir_contents_model))
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                        )

                    if job_name == "multi file format test":
                        # Test job standard out
                        isSame, msg = identical_files(job_stdout_file, self.wf_examples.lo_mff_stdout)
                        self.assertTrue(isSame, msg)
                        # Test no stderr
                        self.assertTrue(
                            os.stat(job_stderr_file).st_size == 0, "job stderr not empty : cf %s" % job_stderr_file
                        )
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 28
0
    def test_result(self):
        nb = 20
        time_sleep = 1

        workflow = self.wf_examples.example_n_jobs(nb=nb, time=time_sleep)
        self.wf_id = self.wf_ctrl.submit_workflow(workflow=workflow,
                                                  name=self.__class__.__name__)
        # Transfer input files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_input_files(self.wf_id, self.wf_ctrl)
        # Wait for the workflow to finish
        Helper.wait_workflow(self.wf_id, self.wf_ctrl)
        # Transfer output files if file transfer
        if self.path_management == self.FILE_TRANSFER or \
                self.path_management == self.SHARED_TRANSFER:
            Helper.transfer_output_files(self.wf_id, self.wf_ctrl)

        status = self.wf_ctrl.workflow_status(self.wf_id)
        self.assertTrue(
            status == constants.WORKFLOW_DONE,
            "workflow status : %s. Expected : %s" %
            (status, constants.WORKFLOW_DONE))
        nb_failed_jobs = len(Helper.list_failed_jobs(self.wf_id, self.wf_ctrl))
        self.assertTrue(
            nb_failed_jobs == 0,
            "nb failed jobs : %i. Expected : %i" % (nb_failed_jobs, 0))
        nb_failed_aborted_jobs = len(
            Helper.list_failed_jobs(self.wf_id,
                                    self.wf_ctrl,
                                    include_aborted_jobs=True))
        self.assertTrue(
            nb_failed_aborted_jobs == 0,
            "nb failed jobs including aborted : %i. Expected : %i" %
            (nb_failed_aborted_jobs, 0))

        (jobs_info, transfers_info, workflow_status, workflow_queue,
         tmp_files) = self.wf_ctrl.workflow_elements_status(self.wf_id)

        for (job_id, tmp_status, queue, exit_info, dates, drmaa_id) \
                in jobs_info:
            job_list = self.wf_ctrl.jobs([job_id])
            job_name, job_command, job_submission_date = job_list[job_id]

            self.tested_job = job_id

            if exit_info[0] == constants.FINISHED_REGULARLY:
                # To check job standard out and standard err
                job_stdout_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_out_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stdout_file = job_stdout_file.name
                job_stderr_file = tempfile.NamedTemporaryFile(
                    prefix="job_soma_outerr_log_",
                    suffix=repr(job_id),
                    delete=False)
                job_stderr_file = job_stderr_file.name

                try:
                    self.wf_ctrl.retrieve_job_stdouterr(
                        job_id, job_stdout_file, job_stderr_file)
                    # Test stdout
                    self.assertTrue(
                        os.stat(job_stdout_file).st_size == 0,
                        "job stdout not empty : file: %s, "
                        "contents:\n%s" %
                        (job_stdout_file, open(job_stdout_file).read()))
                    # Test no stderr
                    self.assertTrue(
                        os.stat(job_stderr_file).st_size == 0,
                        "job stderr not empty : file %s, "
                        "contents:\n%s" %
                        (job_stderr_file, open(job_stderr_file).read()))
                finally:
                    os.unlink(job_stdout_file)
                    os.unlink(job_stderr_file)

        del self.tested_job
Ejemplo n.º 29
0
            "--output_r2",
            os.path.join(args.output, 'r2'), "--output_distribution",
            os.path.join(args.output, 'distribution'), "--x", args.x, "--y",
            args.y, "--shuffling", shuffling, "--n_permutations",
            args.nb_permutations, "--alpha_percentile", args.alpha_percentile
        ],
                  name="job {} - alpha {}".format(run, alpha),
                  working_directory=scripts_path)
        group_significativity.append(job)
        jobs.append(job)

    distribution_voxels = Group(elements=group_significativity,
                                name="Voxel wise fitting of the models")

    workflow2 = Workflow(jobs=jobs, root_group=[distribution_voxels])

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller2 = WorkflowController(
        "DSV_cluster_ap259944", args.login,
        args.password)  #"DSV_cluster_ap259944", args.login, args.password

    workflow_id2 = controller2.submit_workflow(workflow=workflow2,
                                               name="Voxel-wise computations")

    # You may use the gui or manually transfer the files:
    manual = True
    if manual:
        Helper.transfer_input_files(workflow_id2, controller2)
        Helper.wait_workflow(workflow_id2, controller2)
        Helper.transfer_output_files(workflow_id2, controller2)