def _submit_graph(self, pyfiles, dependencies, nodes): jobs = [] soma_deps = [] for idx, fname in enumerate(pyfiles): name = os.path.splitext(os.path.split(fname)[1])[0] jobs.append(Job(command=[sys.executable, fname], name=name)) for key, values in list(dependencies.items()): for val in values: soma_deps.append((jobs[val], jobs[key])) wf = Workflow(jobs, soma_deps) logger.info('serializing workflow') Helper.serialize('workflow', wf) controller = WorkflowController() logger.info('submitting workflow') wf_id = controller.submit_workflow(wf) Helper.wait_workflow(wf_id, controller)
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="trasform", ... num_processes=3) >>> tree_root_node = sfw_engine.run(**Xy) >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}, {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}]) ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer from soma.workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez(os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [Job(command=[u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile)], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list] soma_workflow = Workflow(jobs=jobs) if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root
from soma.workflow.client import Job, Workflow, WorkflowController, SharedResourcePath, FileTransfer # SharedResourcePath creation for the input file. # The input file is read direclty in the data directory located on the # the computing resource side. myfile = SharedResourcePath(relative_path="myfile", namespace="MyApp", uuid="my_example_dir") # FileTransfer creation for the output file. # That way the output file will not be written in the data directory # located on the computing resource file system. copy_of_myfile = FileTransfer(is_input=False, client_path="/tmp/soma_workflow_examples/copy_of_myfile", name="copy of my file") # Job and Workflow creation copy_job = Job(command=["cp", myfile, copy_of_myfile], name="copy", referenced_input_files=[], referenced_output_files=[copy_of_myfile]) workflow = Workflow(jobs=[copy_job], dependencies=[]) # workflow submission controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="shared resource path example")
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="trasform", ... num_processes=3) >>> tree_root_node = sfw_engine.run(**Xy) >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}, {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}]) ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer from soma.workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez( os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [ Job(command=[ u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile) ], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list ] soma_workflow = Workflow(jobs=jobs) if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path): shutil.rmtree(tmp_work_dir_path) return self.tree_root
from soma.workflow.client import Helper # FileTransfer creation for input files myfile = FileTransfer(is_input=True, client_path="/tmp/soma_workflow_examples/myfile", name="myfile") # FileTransfer creation for output files copy_of_myfile = FileTransfer(is_input=False, client_path="/tmp/soma_workflow_examples/copy_of_myfile", name="copy of my file") # Job and Workflow copy_job = Job(command=["cp", myfile, copy_of_myfile], name="copy", referenced_input_files=[myfile], referenced_output_files=[copy_of_myfile]) workflow = Workflow(jobs=[copy_job], dependencies=[]) # submit the workflow controller = WorkflowController("jinpeng-Latitude-E6530", "jinpeng") wf_id = controller.submit_workflow(workflow=workflow, name="simple transfer") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) controller.delete_workflow(wf_id)
f.write("Content of my file2 \n") f.close() # Creation of the FileTransfer object to transfer the working directory my_working_directory = FileTransfer(is_input=True, client_path="/tmp/my_working_directory", name="working directory") # Jobs and Workflow job1 = Job(command=["cp", "myfile1" , "copy_of_myfile1"], name="job1", referenced_input_files=[my_working_directory], referenced_output_files=[my_working_directory], working_directory=my_working_directory) job2 = Job(command=["cp", "myfile2" , "copy_of_myfile2"], name="job2", referenced_input_files=[my_working_directory], referenced_output_files=[my_working_directory], working_directory=my_working_directory) workflow = Workflow(jobs=[job1, job2], dependencies=[]) # Submit the workflow print "password? " password = getpass.getpass() controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="working directory transfer example")
from soma.workflow.client import Job, Workflow, WorkflowController job_1 = Job(command=["sleep", "60"], name="job 1") job_2 = Job(command=["sleep", "60"], name="job 2") job_3 = Job(command=["sleep", "60"], name="job 3") job_4 = Job(command=["sleep", "60"], name="job 4") jobs = [job_1, job_2, job_3, job_4] dependencies = [(job_1, job_2), (job_1, job_3), (job_2, job_4), (job_3, job_4)] workflow = Workflow(jobs=jobs, dependencies=dependencies) controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="simple example")
#f = open("/tmp/soma_workflow_examples/myfile", "wb") #f.write("Content of my file \n") #f.close() # FileTransfer creation for input files myfile = FileTransfer(is_input=True, client_path="/tmp/soma_workflow_examples/myfile", name="myfile") # FileTransfer creation for output files copy_of_myfile = FileTransfer(is_input=False, client_path="/tmp/soma_workflow_examples/copy_of_myfile", name="copy of my file") # Job and Workflow copy_job = Job(command=["cp", myfile, copy_of_myfile], name="copy", referenced_input_files=[myfile], referenced_output_files=[copy_of_myfile]) workflow = Workflow(jobs=[copy_job], dependencies=[]) controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="simple transfer")
group_elements = [] first_job = Job(command=["sleep", "10"], name="first job") last_job = Job(command=["sleep", "10"], name="last job") jobs.append(first_job) jobs.append(last_job) for i in range(0, 30): job = Job(command=["sleep", "60"], name="job " + repr(i)) jobs.append(job) dependencies.append((first_job, job)) dependencies.append((job, last_job)) group_elements.append(job) thirty_jobs_group = Group(elements=group_elements, name="my 30 jobs") workflow = Workflow(jobs=jobs, dependencies= dependencies, root_group=[first_job, thirty_jobs_group, last_job]) controller = WorkflowController("DSV_cluster", login, password) controller.submit_workflow(workflow=workflow, name="Simple workflow with group")