Exemple #1
0
 def export(self, workflow_dir, num_processes):
     '''
     Parameters
     ----------
     workflow_dir: string
         the directory to export workflow
     num_processes: integer
         the number of processes you want to run
     '''
     self.workflow_dir = workflow_dir
     if not os.path.exists(self.workflow_dir):
         os.makedirs(self.workflow_dir)
     tree_root = load_tree(self.epac_tree_dir_path)
     keysfile_list = export_jobs(tree_root, num_processes, workflow_dir)
     map_cmds = []
     reduce_cmds = []
     for i in range(len(keysfile_list)):
         key_path = os.path.join(workflow_dir, keysfile_list[i])
         map_cmd = []
         map_cmd.append("epac_mapper")
         map_cmd.append("--datasets")
         map_cmd.append(self.dataset_dir_path)
         map_cmd.append("--keysfile")
         map_cmd.append(key_path)
         map_cmd.append("--treedir")
         map_cmd.append(self.epac_tree_dir_path)
         map_cmds.append(map_cmd)
     reduce_cmd = []
     reduce_cmd.append("epac_reducer")
     reduce_cmd.append("--treedir")
     reduce_cmd.append(self.epac_tree_dir_path)
     reduce_cmd.append("--outdir")
     reduce_cmd.append(self.out_dir_path)
     reduce_cmds.append(reduce_cmd)
     filename_bash_jobs = os.path.join(workflow_dir, "bash_jobs.sh")
     export_bash_jobs(filename_bash_jobs, map_cmds, reduce_cmds)
Exemple #2
0
    def export(self, workflow_dir, num_processes):
        '''
        Parameters
        ----------
        workflow_dir: string
            the directory to export workflow
        num_processes: integer
            the number of processes you want to run
        '''
        try:
            from soma_workflow.client import Job
            from soma_workflow.client import Group
            from soma_workflow.client import Workflow
            from soma_workflow.client import Helper
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError

        self.workflow_dir = workflow_dir
        soma_workflow_file = os.path.join(self.workflow_dir, "soma_workflow")
        if not os.path.exists(self.workflow_dir):
            os.makedirs(self.workflow_dir)
        tree_root = load_tree(self.epac_tree_dir_path)
        keysfile_list = export_jobs(tree_root, num_processes, workflow_dir)
        # Building mapper task
        dependencies = []
        map_jobs = []
        for i in range(len(keysfile_list)):
            key_path = os.path.join(workflow_dir, keysfile_list[i])
            map_cmd = []
            map_cmd.append("epac_mapper")
            map_cmd.append("--datasets")
            map_cmd.append(self.dataset_dir_path)
            map_cmd.append("--keysfile")
            map_cmd.append(key_path)
            map_cmd.append("--treedir")
            map_cmd.append(self.epac_tree_dir_path)
            map_job = Job(command=map_cmd,
                          name="map_step",
                          referenced_input_files=[],
                          referenced_output_files=[])
            map_jobs.append(map_job)
        group_map_jobs = Group(elements=map_jobs, name="all map jobs")
        # Building reduce task
        reduce_cmd = []
        reduce_cmd.append("epac_reducer")
        reduce_cmd.append("--treedir")
        reduce_cmd.append(self.epac_tree_dir_path)
        reduce_cmd.append("--outdir")
        reduce_cmd.append(self.out_dir_path)
        reduce_job = Job(command=reduce_cmd,
                         name="reduce_step",
                         referenced_input_files=[],
                         referenced_output_files=[])
        for map_job in map_jobs:
            dependencies.append((map_job, reduce_job))
        jobs = map_jobs + [reduce_job]
        # Build workflow and save into disk
        workflow = Workflow(jobs=jobs,
                            dependencies=dependencies,
                            root_group=[group_map_jobs, reduce_job])
        Helper.serialize(soma_workflow_file, workflow)