Beispiel #1
0
  def test_result(self):
    workflow = WorkflowTest.wf_examples.special_command() 
    self.wf_id = WorkflowTest.wf_ctrl.submit_workflow(workflow=workflow, 
                                             name="unit test command check")
    if self.path_management == WorkflowTest.FILE_TRANSFER:
      Helper.transfer_input_files(self.wf_id, WorkflowTest.wf_ctrl)

    Helper.wait_workflow(self.wf_id, WorkflowTest.wf_ctrl)
    status = self.wf_ctrl.workflow_status(self.wf_id)
    
    self.assert_(status == constants.WORKFLOW_DONE)
    self.assert_(len(Helper.list_failed_jobs(self.wf_id, 
                                             WorkflowTest.wf_ctrl)) == 0)
    self.assert_(len(Helper.list_failed_jobs(self.wf_id, 
                                             WorkflowTest.wf_ctrl,
                                             include_aborted_jobs=True)) == 0)
Beispiel #2
0
    def _submit_graph(self, pyfiles, dependencies, nodes):
        jobs = []
        soma_deps = []
        for idx, fname in enumerate(pyfiles):
            name = os.path.splitext(os.path.split(fname)[1])[0]
            jobs.append(Job(command=[sys.executable, fname], name=name))
        for key, values in list(dependencies.items()):
            for val in values:
                soma_deps.append((jobs[val], jobs[key]))

        wf = Workflow(jobs, soma_deps)
        logger.info('serializing workflow')
        Helper.serialize('workflow', wf)
        controller = WorkflowController()
        logger.info('submitting workflow')
        wf_id = controller.submit_workflow(wf)
        Helper.wait_workflow(wf_id, controller)
Beispiel #3
0
    def _submit_graph(self, pyfiles, dependencies, nodes):
        jobs = []
        soma_deps = []
        for idx, fname in enumerate(pyfiles):
            name = os.path.splitext(os.path.split(fname)[1])[0]
            jobs.append(Job(command=[sys.executable, fname], name=name))
        for key, values in list(dependencies.items()):
            for val in values:
                soma_deps.append((jobs[val], jobs[key]))

        wf = Workflow(jobs, soma_deps)
        logger.info('serializing workflow')
        Helper.serialize('workflow', wf)
        controller = WorkflowController()
        logger.info('submitting workflow')
        wf_id = controller.submit_workflow(wf)
        Helper.wait_workflow(wf_id, controller)
Beispiel #4
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                        client_path=tmp_work_dir_path,
                                        name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(os.path.join(tmp_work_dir_path,
                 SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path,
            SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path,
                                            nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [Job(command=[u"epac_mapper",
                         u'--datasets', '"%s"' %
                         (SomaWorkflowEngine.dataset_relative_path),
                         u'--keysfile', '"%s"' %
                         (nodesfile)],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
                for nodesfile in keysfile_list]
        soma_workflow = Workflow(jobs=jobs)
        if not  self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id,
                                        self.login,
                                        self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Beispiel #5
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                            client_path=tmp_work_dir_path,
                                            name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(
            os.path.join(tmp_work_dir_path,
                         SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [
            Job(command=[
                u"epac_mapper", u'--datasets',
                '"%s"' % (SomaWorkflowEngine.dataset_relative_path),
                u'--keysfile',
                '"%s"' % (nodesfile)
            ],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
            for nodesfile in keysfile_list
        ]
        soma_workflow = Workflow(jobs=jobs)
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path):
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
from soma.workflow.client import Helper

# FileTransfer creation for input files
myfile = FileTransfer(is_input=True,
                    client_path="/tmp/soma_workflow_examples/myfile",
                    name="myfile")

# FileTransfer creation for output files
copy_of_myfile = FileTransfer(is_input=False,
                              client_path="/tmp/soma_workflow_examples/copy_of_myfile",
                              name="copy of my file")

# Job and Workflow
copy_job = Job(command=["cp", myfile, copy_of_myfile],
              name="copy",
              referenced_input_files=[myfile],
              referenced_output_files=[copy_of_myfile])

workflow = Workflow(jobs=[copy_job],
                    dependencies=[])

# submit the workflow
controller = WorkflowController("jinpeng-Latitude-E6530", "jinpeng")

wf_id = controller.submit_workflow(workflow=workflow,
                          name="simple transfer")

Helper.transfer_input_files(wf_id, controller)
Helper.wait_workflow(wf_id, controller)
Helper.transfer_output_files(wf_id, controller)
controller.delete_workflow(wf_id)
Beispiel #7
0
  def test_result(self):
    workflow = WorkflowTest.wf_examples.multiple_simple_example() 
    self.wf_id = WorkflowTest.wf_ctrl.submit_workflow(workflow=workflow, 
                                             name="unit test multiple")
    if self.path_management == WorkflowTest.FILE_TRANSFER:
      Helper.transfer_input_files(self.wf_id, WorkflowTest.wf_ctrl)

    Helper.wait_workflow(self.wf_id, WorkflowTest.wf_ctrl)
    
    if self.path_management == WorkflowTest.FILE_TRANSFER:
      Helper.transfer_output_files(self.wf_id, WorkflowTest.wf_ctrl)
    
    status = self.wf_ctrl.workflow_status(self.wf_id)

    fail_jobs=Helper.list_failed_jobs(self.wf_id, WorkflowTest.wf_ctrl)
    
#    num_list_fail_jobs=len(fail_jobs)
#    print "num_list_fail_jobs=" + repr(num_list_fail_jobs)
#    for fail_job_id in fail_jobs:
#        print "fail job id :" +repr(fail_job_id)+"\n"
    

                                             
    (jobs_info, 
    transfers_info, 
    workflow_status, 
    workflow_queue) = WorkflowTest.wf_ctrl.workflow_elements_status(self.wf_id)
#    print "len(jobs_info)=" + repr(len(jobs_info)) + "\n"
                                             
                                             
    # TODO: check the stdout and stderrr
    for (job_id, tmp_status, queue, exit_info, dates) in jobs_info:
#        print "job_id="         +repr(job_id)+"\n"
        job_list=self.wf_ctrl.jobs([job_id])
        #print 'len(job_list)='+repr(len(job_list))+"\n"
        job_name, job_command, job_submission_date=job_list[job_id]
        
#        print "name="			+repr(job_name)+"\n"
#        print "command="        +repr(job_command)+"\n"
#        print "submission="     +repr(job_submission_date)+"\n"
#        print "tmp_status="     +repr(tmp_status)+"\n"
#        print "exit_info="		+repr(exit_info)+"\n"
#        print "dates="			+repr(dates)+"\n"
        
        
        ##To check job standard out 
        if repr(job_name)=="'job1'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)
            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout1)
            self.failUnless(isSame == True)

            if self.path_management==WorkflowTest.LOCAL_PATH:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_out_model_file11,	WorkflowTest.wf_examples.lo_file11)
                self.failUnless(isSame == True)
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_out_model_file12,	WorkflowTest.wf_examples.lo_file12)
                self.failUnless(isSame == True)
                isSame,	msg	= identicalFiles(job_stderr_file,WorkflowTest.wf_examples.lo_stderr1)
                self.failUnless(isSame == True)
            if self.path_management==WorkflowTest.FILE_TRANSFER:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_out_model_file11,	WorkflowTest.wf_examples.tr_file11.client_path)
                self.failUnless(isSame == True)    
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_out_model_file12,	WorkflowTest.wf_examples.tr_file12.client_path)
                self.failUnless(isSame == True)
                #For unknown reason, it raise some errors
                #http://stackoverflow.com/questions/10496758/unexpected-end-of-file-and-error-importing-function-definition-error-running 
                #isSame,	msg	= identicalFiles(job_stderr_file,WorkflowTest.wf_examples.lo_stderr1)
                #self.failUnless(isSame == True)
        
        if repr(job_name)=="'job1 with exception'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)
            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout1_exception_model)
            self.failUnless(isSame == True)

        if repr(job_name)=="'job2'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)
            if self.path_management==WorkflowTest.FILE_TRANSFER:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.tr_file2.client_path,	WorkflowTest.wf_examples.lo_out_model_file2)
            if self.path_management==WorkflowTest.LOCAL_PATH:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_file2,	WorkflowTest.wf_examples.lo_out_model_file2)
                self.failUnless(isSame == True)
                isSame,	msg	= identicalFiles(job_stderr_file,WorkflowTest.wf_examples.lo_stderr2)
                self.failUnless(isSame == True)
            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout2)
            self.failUnless(isSame == True)
            
            
        if repr(job_name)=="'job3'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)

            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout3)
            self.failUnless(isSame == True)
            if self.path_management==WorkflowTest.LOCAL_PATH:
                isSame,	msg	= identicalFiles(job_stderr_file,WorkflowTest.wf_examples.lo_stderr3)
                self.failUnless(isSame == True)
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_file3,	WorkflowTest.wf_examples.lo_out_model_file3)
                self.failUnless(isSame == True)
            if self.path_management==WorkflowTest.FILE_TRANSFER:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.tr_file3.client_path,	WorkflowTest.wf_examples.lo_out_model_file3)
                self.failUnless(isSame == True)
         
        if repr(job_name)=="'job4'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)
            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout4)
            self.failUnless(isSame == True)
            if self.path_management==WorkflowTest.LOCAL_PATH:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.lo_file4,	WorkflowTest.wf_examples.lo_out_model_file4)
                self.failUnless(isSame == True)
                isSame,	msg	= identicalFiles(job_stderr_file,WorkflowTest.wf_examples.lo_stderr4)
                self.failUnless(isSame == True)    
            if self.path_management==WorkflowTest.FILE_TRANSFER:
                isSame,	msg	= identicalFiles(WorkflowTest.wf_examples.tr_file4.client_path,	WorkflowTest.wf_examples.lo_out_model_file4)
                self.failUnless(isSame == True)

        if repr(job_name)=="'job3 with exception'" and exit_info[0]==constants.FINISHED_REGULARLY:
            #print "Verify "+repr(job_name)+" \n"
            job_stdout_file="/tmp/job_soma_out_log_"+repr(job_id)
            job_stderr_file="/tmp/job_soma_outerr_log_"+repr(job_id)
            self.wf_ctrl.retrieve_job_stdouterr(job_id,job_stdout_file,job_stderr_file)
            isSame,	msg	= identicalFiles(job_stdout_file,WorkflowTest.wf_examples.lo_stdout1_exception_model)
            self.failUnless(isSame == True)

    self.assert_(status == constants.WORKFLOW_DONE)
    self.assert_(len(Helper.list_failed_jobs(self.wf_id, 
                                             WorkflowTest.wf_ctrl)) == 2)
    self.assert_(len(Helper.list_failed_jobs(self.wf_id, 
                                             WorkflowTest.wf_ctrl,
                                             include_aborted_jobs=True)) == 6)