Beispiel #1
0
    def _submit_graph(self, pyfiles, dependencies, nodes):
        jobs = []
        soma_deps = []
        for idx, fname in enumerate(pyfiles):
            name = os.path.splitext(os.path.split(fname)[1])[0]
            jobs.append(Job(command=[sys.executable, fname], name=name))
        for key, values in list(dependencies.items()):
            for val in values:
                soma_deps.append((jobs[val], jobs[key]))

        wf = Workflow(jobs, soma_deps)
        logger.info('serializing workflow')
        Helper.serialize('workflow', wf)
        controller = WorkflowController()
        logger.info('submitting workflow')
        wf_id = controller.submit_workflow(wf)
        Helper.wait_workflow(wf_id, controller)
Beispiel #2
0
    def _submit_graph(self, pyfiles, dependencies, nodes):
        jobs = []
        soma_deps = []
        for idx, fname in enumerate(pyfiles):
            name = os.path.splitext(os.path.split(fname)[1])[0]
            jobs.append(Job(command=[sys.executable, fname], name=name))
        for key, values in list(dependencies.items()):
            for val in values:
                soma_deps.append((jobs[val], jobs[key]))

        wf = Workflow(jobs, soma_deps)
        logger.info('serializing workflow')
        Helper.serialize('workflow', wf)
        controller = WorkflowController()
        logger.info('submitting workflow')
        wf_id = controller.submit_workflow(wf)
        Helper.wait_workflow(wf_id, controller)
Beispiel #3
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                        client_path=tmp_work_dir_path,
                                        name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(os.path.join(tmp_work_dir_path,
                 SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path,
            SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path,
                                            nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [Job(command=[u"epac_mapper",
                         u'--datasets', '"%s"' %
                         (SomaWorkflowEngine.dataset_relative_path),
                         u'--keysfile', '"%s"' %
                         (nodesfile)],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
                for nodesfile in keysfile_list]
        soma_workflow = Workflow(jobs=jobs)
        if not  self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id,
                                        self.login,
                                        self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Beispiel #4
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                            client_path=tmp_work_dir_path,
                                            name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(
            os.path.join(tmp_work_dir_path,
                         SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [
            Job(command=[
                u"epac_mapper", u'--datasets',
                '"%s"' % (SomaWorkflowEngine.dataset_relative_path),
                u'--keysfile',
                '"%s"' % (nodesfile)
            ],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
            for nodesfile in keysfile_list
        ]
        soma_workflow = Workflow(jobs=jobs)
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path):
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
Beispiel #5
0
from soma.workflow.client import Job, Workflow, WorkflowController, SharedResourcePath, FileTransfer

# SharedResourcePath creation for the input file.
# The input file is read direclty in the data directory located on the
# the computing resource side. 
myfile = SharedResourcePath(relative_path="myfile",
                            namespace="MyApp",
                            uuid="my_example_dir")

# FileTransfer creation for the output file.
# That way the output file will not be written in the data directory 
# located on the computing resource file system.
copy_of_myfile = FileTransfer(is_input=False,
                              client_path="/tmp/soma_workflow_examples/copy_of_myfile",
                              name="copy of my file")

# Job and Workflow creation
copy_job = Job(command=["cp", myfile, copy_of_myfile], 
               name="copy",
               referenced_input_files=[],
               referenced_output_files=[copy_of_myfile])

workflow = Workflow(jobs=[copy_job], 
                    dependencies=[])

# workflow submission
controller = WorkflowController("DSV_cluster", login, password)

controller.submit_workflow(workflow=workflow,
                           name="shared resource path example")
from soma.workflow.client import Helper

# FileTransfer creation for input files
myfile = FileTransfer(is_input=True,
                    client_path="/tmp/soma_workflow_examples/myfile",
                    name="myfile")

# FileTransfer creation for output files
copy_of_myfile = FileTransfer(is_input=False,
                              client_path="/tmp/soma_workflow_examples/copy_of_myfile",
                              name="copy of my file")

# Job and Workflow
copy_job = Job(command=["cp", myfile, copy_of_myfile],
              name="copy",
              referenced_input_files=[myfile],
              referenced_output_files=[copy_of_myfile])

workflow = Workflow(jobs=[copy_job],
                    dependencies=[])

# submit the workflow
controller = WorkflowController("jinpeng-Latitude-E6530", "jinpeng")

wf_id = controller.submit_workflow(workflow=workflow,
                          name="simple transfer")

Helper.transfer_input_files(wf_id, controller)
Helper.wait_workflow(wf_id, controller)
Helper.transfer_output_files(wf_id, controller)
controller.delete_workflow(wf_id)
f.write("Content of my file2 \n")
f.close()

# Creation of the FileTransfer object to transfer the working directory
my_working_directory = FileTransfer(is_input=True,
                                    client_path="/tmp/my_working_directory",
                                    name="working directory")

# Jobs and Workflow
job1 = Job(command=["cp", "myfile1" , "copy_of_myfile1"], 
           name="job1",
           referenced_input_files=[my_working_directory],
           referenced_output_files=[my_working_directory],
           working_directory=my_working_directory)

job2 = Job(command=["cp", "myfile2" , "copy_of_myfile2"], 
           name="job2",
           referenced_input_files=[my_working_directory],
           referenced_output_files=[my_working_directory],
           working_directory=my_working_directory)

workflow = Workflow(jobs=[job1, job2], 
                    dependencies=[])

# Submit the workflow
print "password? "
password = getpass.getpass()
controller = WorkflowController("DSV_cluster", login, password)

controller.submit_workflow(workflow=workflow,
                           name="working directory transfer example")
Beispiel #8
0
from soma.workflow.client import Job, Workflow, WorkflowController

job_1 = Job(command=["sleep", "60"], name="job 1")
job_2 = Job(command=["sleep", "60"], name="job 2")
job_3 = Job(command=["sleep", "60"], name="job 3")
job_4 = Job(command=["sleep", "60"], name="job 4")

jobs = [job_1, job_2, job_3, job_4]
dependencies = [(job_1, job_2), 
                (job_1, job_3), 
                (job_2, job_4), 
                (job_3, job_4)]

workflow = Workflow(jobs=jobs, 
                    dependencies=dependencies)


controller = WorkflowController("DSV_cluster", login, password)

controller.submit_workflow(workflow=workflow,
                           name="simple example")
group_elements = []

first_job = Job(command=["sleep", "10"], name="first job")
last_job = Job(command=["sleep", "10"], name="last job")

jobs.append(first_job)
jobs.append(last_job)

for i in range(0, 30):
  job = Job(command=["sleep", "60"], name="job " + repr(i))

  jobs.append(job)

  dependencies.append((first_job, job))
  dependencies.append((job, last_job))

  group_elements.append(job)
  

thirty_jobs_group = Group(elements=group_elements,
                          name="my 30 jobs")

workflow = Workflow(jobs=jobs,
                    dependencies= dependencies,
                    root_group=[first_job, thirty_jobs_group, last_job])

controller = WorkflowController("DSV_cluster", login, password)

controller.submit_workflow(workflow=workflow,
                           name="Simple workflow with group")