def export(self, workflow_dir, num_processes): ''' Parameters ---------- workflow_dir: string the directory to export workflow num_processes: integer the number of processes you want to run ''' self.workflow_dir = workflow_dir if not os.path.exists(self.workflow_dir): os.makedirs(self.workflow_dir) tree_root = load_tree(self.epac_tree_dir_path) keysfile_list = export_jobs(tree_root, num_processes, workflow_dir) map_cmds = [] reduce_cmds = [] for i in range(len(keysfile_list)): key_path = os.path.join(workflow_dir, keysfile_list[i]) map_cmd = [] map_cmd.append("epac_mapper") map_cmd.append("--datasets") map_cmd.append(self.dataset_dir_path) map_cmd.append("--keysfile") map_cmd.append(key_path) map_cmd.append("--treedir") map_cmd.append(self.epac_tree_dir_path) map_cmds.append(map_cmd) reduce_cmd = [] reduce_cmd.append("epac_reducer") reduce_cmd.append("--treedir") reduce_cmd.append(self.epac_tree_dir_path) reduce_cmd.append("--outdir") reduce_cmd.append(self.out_dir_path) reduce_cmds.append(reduce_cmd) filename_bash_jobs = os.path.join(workflow_dir, "bash_jobs.sh") export_bash_jobs(filename_bash_jobs, map_cmds, reduce_cmds)
def export(self, workflow_dir, num_processes): ''' Parameters ---------- workflow_dir: string the directory to export workflow num_processes: integer the number of processes you want to run ''' try: from soma_workflow.client import Job from soma_workflow.client import Group from soma_workflow.client import Workflow from soma_workflow.client import Helper except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError self.workflow_dir = workflow_dir soma_workflow_file = os.path.join(self.workflow_dir, "soma_workflow") if not os.path.exists(self.workflow_dir): os.makedirs(self.workflow_dir) tree_root = load_tree(self.epac_tree_dir_path) keysfile_list = export_jobs(tree_root, num_processes, workflow_dir) # Building mapper task dependencies = [] map_jobs = [] for i in range(len(keysfile_list)): key_path = os.path.join(workflow_dir, keysfile_list[i]) map_cmd = [] map_cmd.append("epac_mapper") map_cmd.append("--datasets") map_cmd.append(self.dataset_dir_path) map_cmd.append("--keysfile") map_cmd.append(key_path) map_cmd.append("--treedir") map_cmd.append(self.epac_tree_dir_path) map_job = Job(command=map_cmd, name="map_step", referenced_input_files=[], referenced_output_files=[]) map_jobs.append(map_job) group_map_jobs = Group(elements=map_jobs, name="all map jobs") # Building reduce task reduce_cmd = [] reduce_cmd.append("epac_reducer") reduce_cmd.append("--treedir") reduce_cmd.append(self.epac_tree_dir_path) reduce_cmd.append("--outdir") reduce_cmd.append(self.out_dir_path) reduce_job = Job(command=reduce_cmd, name="reduce_step", referenced_input_files=[], referenced_output_files=[]) for map_job in map_jobs: dependencies.append((map_job, reduce_job)) jobs = map_jobs + [reduce_job] # Build workflow and save into disk workflow = Workflow(jobs=jobs, dependencies=dependencies, root_group=[group_map_jobs, reduce_job]) Helper.serialize(soma_workflow_file, workflow)