def export_jobs(tree_root, num_processes, dir_path): if not os.path.exists(dir_path): os.makedirs(dir_path) node_input = NodesInput(tree_root.get_key()) split_node_input = SplitNodesInput(tree_root, num_processes=num_processes) nodesinput_list = split_node_input.split(node_input) return save_job_list(dir_path, nodesinput_list)
def run(self, **Xy): """Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="transform", ... num_processes=3, remove_finished_wf=False) >>> tree_root_node = sfw_engine.run(**Xy) light mode >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}, {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}]) """ try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer from soma_workflow.client import WorkflowController except ImportError: errmsg = ( "No soma-workflow is found. " "Please verify your soma-worklow" "on your computer (e.g. PYTHONPATH) \n" ) sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) is_run_local = False if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() is_run_local = True # print "is_run_local=", is_run_local if not is_run_local: ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") else: ft_working_directory = tmp_work_dir_path ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join(tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory) soma_workflow = Workflow(jobs=jobs) controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue) Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) self.engine_info = self.get_engine_info(controller, wf_id) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="transform", ... num_processes=3, remove_finished_wf=False) >>> tree_root_node = sfw_engine.run(**Xy) light mode >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}, {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6 0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0. 0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0. 0.5], 'y/test/score_precision/pval': [ 0.5 0. ], 'y/test/score_precision': [ 0.6 0.6], 'y/test/score_recall': [ 0.6 0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}]) ''' try: from soma_workflow.client import Job, Workflow from soma_workflow.client import Helper, FileTransfer from soma_workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) is_run_local = False if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() is_run_local = True # print "is_run_local=", is_run_local if not is_run_local: ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") else: ft_working_directory = tmp_work_dir_path ## Save the database and tree to working directory ## =============================================== # np.savez(os.path.join(tmp_work_dir_path, # SomaWorkflowEngine.dataset_relative_path), **Xy) save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = self._create_jobs(keysfile_list, is_run_local, ft_working_directory) soma_workflow = Workflow(jobs=jobs) controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow", queue=self.queue) Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) self.engine_info = self.get_engine_info(controller, wf_id) if self.remove_finished_wf: controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree: shutil.rmtree(tmp_work_dir_path) return self.tree_root