Beispiel #1
0
    def submit_job4(self, time=10):
        self.file4_tr = self.wf_ctrl.register_transfer(
            FileTransfer(False,
                         os.path.join(self.output_dir, "file4"),
                         self.tr_timeout))
        script4_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "job4.py"),
                         self.tr_timeout))
        stdin4_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "stdin4"),
                         self.tr_timeout))
        self.wf_ctrl.transfer_files(script4_tr.engine_path)
        self.wf_ctrl.transfer_files(stdin4_tr.engine_path)
        job4_id = self.wf_ctrl.submit_job(Job(
            command=[self.python, script4_tr, self.file2_tr,
                     self.file3_tr, self.file4_tr, repr(time)],
            referenced_input_files=[self.file2_tr, self.file3_tr,
                                    script4_tr, stdin4_tr],
            referenced_output_files=[self.file4_tr],
            stdin=stdin4_tr,
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="job4 with transfers"))

        return (job4_id, [self.file4_tr.engine_path], None)
 def job_sleep(self, period):
     complete_path = os.path.join(self.examples_dir, "complete")
     transfer = FileTransfer(
         True, os.path.join(complete_path, "file0"),
         168, "file0")
     job = Job(["python", self.tr_sleep_script, repr(period)],
               [self.tr_sleep_script, transfer], [],
               None, False, 168, "sleep " + repr(period) + " s")
     return job
Beispiel #3
0
 def export_to_gui(self, soma_workflow_dirpath, **Xy):
     '''
     Example
     -------
     see the directory of "examples/run_somaworkflow_gui.py" in epac
     '''
     try:
         from soma_workflow.client import Job, Workflow
         from soma_workflow.client import Helper, FileTransfer
     except ImportError:
         errmsg = "No soma-workflow is found. "\
             "Please verify your soma-worklow"\
             "on your computer (e.g. PYTHONPATH) \n"
         sys.stderr.write(errmsg)
         sys.stdout.write(errmsg)
         raise NoSomaWFError
     if not os.path.exists(soma_workflow_dirpath):
         os.makedirs(soma_workflow_dirpath)
     tmp_work_dir_path = soma_workflow_dirpath
     cur_work_dir = os.getcwd()
     os.chdir(tmp_work_dir_path)
     ft_working_directory = FileTransfer(is_input=True,
                                         client_path=tmp_work_dir_path,
                                         name="working directory")
     ## Save the database and tree to working directory
     ## ===============================================
     #        np.savez(os.path.join(tmp_work_dir_path,
     #                 SomaWorkflowEngine.dataset_relative_path), **Xy)
     db_size = estimate_dataset_size(**Xy)
     db_size = int(db_size / (1024 * 1024))  # convert it into mega byte
     save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
     store = StoreFs(dirpath=os.path.join(
         tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
     self.tree_root.save_tree(store=store)
     ## Subtree job allocation on disk
     ## ==============================
     node_input = NodesInput(self.tree_root.get_key())
     split_node_input = SplitNodesInput(self.tree_root,
                                        num_processes=self.num_processes)
     nodesinput_list = split_node_input.split(node_input)
     keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
     ## Build soma-workflow
     ## ===================
     jobs = self._create_jobs(keysfile_list,
                              is_run_local=False,
                              ft_working_directory=ft_working_directory)
     soma_workflow = Workflow(jobs=jobs)
     if soma_workflow_dirpath and soma_workflow_dirpath != "":
         out_soma_workflow_file = os.path.join(
             soma_workflow_dirpath,
             SomaWorkflowEngine.open_me_by_soma_workflow_gui)
         Helper.serialize(out_soma_workflow_file, soma_workflow)
     os.chdir(cur_work_dir)
 def shared_function(self,
                     dirname,
                     filename,
                     namespace,
                     uuid,
                     disposal_timeout,
                     is_input,
                     client_paths=None):
     ''' use FileTransfer
     '''
     return FileTransfer(is_input, os.path.join(dirname, filename),
                         disposal_timeout, uuid, client_paths)
Beispiel #5
0
    def submit_job1(self, time=2):
        self.file11_tr = self.wf_ctrl.register_transfer(
            FileTransfer(is_input=False,
                         client_path=os.path.join(self.output_dir, "file11"),
                         disposal_timeout=self.tr_timeout))
        self.file12_tr = self.wf_ctrl.register_transfer(
            FileTransfer(is_input=False,
                         client_path=os.path.join(self.output_dir, "file12"),
                         disposal_timeout=self.tr_timeout))
        self.file0_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "file0"),
                         self.tr_timeout))
        script1_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "job1.py"),
                         self.tr_timeout))
        stdin1_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "stdin1"),
                         self.tr_timeout))
        self.wf_ctrl.transfer_files(self.file0_tr.engine_path)
        self.wf_ctrl.transfer_files(script1_tr.engine_path)
        self.wf_ctrl.transfer_files(stdin1_tr.engine_path)
        sys.stdout.write("files transfered \n")
        job1_id = self.wf_ctrl.submit_job(Job(
            command=[self.python, script1_tr, self.file0_tr,
                     self.file11_tr, self.file12_tr, repr(time)],
            referenced_input_files=[self.file0_tr, script1_tr,
                                    stdin1_tr],
            referenced_output_files=[self.file11_tr, self.file12_tr],
            stdin=stdin1_tr,
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="job1 with transfers"))

        return ((job1_id,
                 [self.file11_tr.engine_path,
                  self.file12_tr.engine_path],
                 None))
Beispiel #6
0
    def submit_exception_job(self):
        script_tr = self.wf_ctrl.register_transfer(
            FileTransfer(True,
                         os.path.join(self.complete_path, "exception_job.py"),
                         self.tr_timeout))
        self.wf_ctrl.transfer_files(script_tr.engine_path)
        job_id = self.wf_ctrl.submit_job(Job(
            command=[self.python, script_tr],
            referenced_input_files=[script_tr],
            referenced_output_files=[],
            stdin=None,
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="job with exception"))

        return (job_id, None, None)
import os

# workind directory creation (this is not a part of the example)
my_working_directory = "/tmp/my_working_directory"
if not os.path.isdir(my_working_directory):
    os.mkdir(my_working_directory)
f = open("/tmp/my_working_directory/myfile1", "wb")
f.write("Content of my file1 \n")
f.close()
f = open("/tmp/my_working_directory/myfile2", "wb")
f.write("Content of my file2 \n")
f.close()

# Creation of the FileTransfer object to transfer the working directory
my_working_directory = FileTransfer(is_input=True,
                                    client_path="/tmp/my_working_directory",
                                    name="working directory")

# Jobs and Workflow
job1 = Job(command=["cp", "myfile1", "copy_of_myfile1"],
           name="job1",
           referenced_input_files=[my_working_directory],
           referenced_output_files=[my_working_directory],
           working_directory=my_working_directory)

job2 = Job(command=["cp", "myfile2", "copy_of_myfile2"],
           name="job2",
           referenced_input_files=[my_working_directory],
           referenced_output_files=[my_working_directory],
           working_directory=my_working_directory)
Beispiel #8
0
    def mpi_job_submission(self, node_num):
        '''
        BROKEN
        '''
        # compilation
        source_tr = self.wf_ctrl.register_transfer(FileTransfer(
            True,
            self.examples_dir + "mpi/simple_mpi.c",
            self.tr_timeout))

        self.wf_ctrl.transfer_files(source_tr.engine_path)

        object_tr = self.wf_ctrl.register_transfer(FileTransfer(
            False,
            self.output_dir + "simple_mpi.o",
            self.tr_timeout))
            #/volatile/laguitton/sge6-2u5/mpich/mpich-1.2.7/bin/
            #/opt/mpich/gnu/bin/

        mpibin = self.wf_ctrl.config._config_parser.get(
            self.wf_ctrl._resource_id,
            configuration.OCFG_PARALLEL_ENV_MPI_BIN)
        sys.stdout.write("mpibin = " + mpibin + '\n')

        sys.stdout.write("source_tr.engine_path = " +
                         source_tr.engine_path + "\n")
        sys.stdout.write("object_tr.engine_path = " +
                         object_tr.engine_path + "\n")
        compil1job_id = self.wf_ctrl.submit_job(Job(
            command=[mpibin + "/mpicc", "-c", source_tr, "-o", object_tr],
            referenced_input_files=[source_tr],
            referenced_output_files=[object_tr],
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="job compil1 mpi"))

        self.wf_ctrl.wait_job([compil1job_id])

        bin_tr = self.wf_ctrl.register_transfer(FileTransfer(
            True,
            self.output_dir + "simple_mpi",
            self.tr_timeout))
        sys.stdout.write("bin_tr.engine_path= " + bin_tr.engine_path + "\n")

        compil2job_id = self.wf_ctrl.submit_job(Job(
            command=[mpibin + "/mpicc", "-o", bin_tr, object_tr],
            referenced_input_files=[object_tr],
            referenced_output_files=[bin_tr],
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="job compil2 mpi"))

        self.wf_ctrl.wait_job([compil2job_id])
        self.wf_ctrl.delete_transfer(object_tr.engine_path)

        # mpi job submission
        script = self.wf_ctrl.register_transfer(FileTransfer(
            True,
            self.examples_dir + "mpi/simple_mpi.sh",
            self.tr_timeout))

        self.wf_ctrl.transfer_files(script.engine_path)

        job_id = self.wf_ctrl.submit_job(Job(
            command=[script, repr(node_num), bin_tr],
            referenced_input_files=[script, bin_tr],
            join_stderrout=False,
            disposal_timeout=self.jobs_timeout,
            name="parallel job mpi",
            parallel_job_info=(configuration.OCFG_PARALLEL_PC_MPI, node_num)))

        self.wf_ctrl.delete_job(compil1job_id)
        self.wf_ctrl.delete_job(compil2job_id)

        return (job_id, [source_tr.engine_path], None)
Beispiel #9
0
from soma_workflow.client import Job, Workflow, WorkflowController, FileTransfer
# import os

# file creation (it is not a part of the example)
# example_directory = "/tmp/soma_workflow_examples"
# if not os.path.isdir(example_directory):
  # os.mkdir(example_directory)

# f = open("/tmp/soma_workflow_examples/myfile", "wb")
# f.write("Content of my file \n")
# f.close()


# FileTransfer creation for input files
myfile = FileTransfer(is_input=True,
                      client_path="/tmp/soma_workflow_examples/myfile",
                      name="myfile")

# FileTransfer creation for output files
copy_of_myfile = FileTransfer(is_input=False,
                              client_path="/tmp/soma_workflow_examples/copy_of_myfile",
                              name="copy of my file")

# Job and Workflow
copy_job = Job(command=["cp", myfile, copy_of_myfile],
               name="copy",
               referenced_input_files=[myfile],
               referenced_output_files=[copy_of_myfile])

workflow = Workflow(jobs=[copy_job],
                    dependencies=[])
Beispiel #10
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        '''
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True,
                                                client_path=tmp_work_dir_path,
                                                name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local,
                                 ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow",
                                           queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root
    def __init__(self):
        '''
        The input and ouput files are temporary files on the computing
        resource and these files can be transfered from and to the
        computing resource using soma workflow API
        '''
        super(WorkflowExamplesTransfer, self).__init__()

# Initialize the dictionaries
        self.tr_file = {}
        self.tr_script = {}
        self.tr_stdin = {}
        self.lo_stdout = {}
        self.lo_out_model_file = {}

        self.lo_in_dir = self.examples_dir
        self.tr_in_dir = FileTransfer(True, self.examples_dir, 168, "in_dir")

        # Complete path
        self.complete_path = os.path.join(self.examples_dir, "complete")
        self.tr_file[0] = FileTransfer(
            True, os.path.join(self.complete_path, "file0"),
            168, "file0")
        self.tr_exceptionJobScript = FileTransfer(
            True, os.path.join(self.complete_path, "exception_job.py"),
            168, "exception_job")
        self.tr_sleep_script = FileTransfer(
            True, os.path.join(self.complete_path, "sleep_job.py"),
            168, "sleep_job")
        self.tr_cmd_check_script = FileTransfer(
            True, os.path.join(self.complete_path, "special_command.py"),
            168, "cmd_check")

        # Models path
        self.models_path = os.path.join(self.complete_path, "output_models")
        self.lo_stdout_exception_model = os.path.join(
            self.models_path, "stdout_exception_job")
        self.lo_stdout_command_remote = os.path.join(
            self.models_path, "stdout_remote_special_command")

        # Special path
        self.special_path = os.path.join(self.examples_dir,
                                         "special_transfers")
        self.tr_img_file = FileTransfer(
            True, os.path.join(self.special_path, "dir_contents.py"),
            128, "img_file",
            [os.path.join(self.special_path, "example.img"),
             os.path.join(self.special_path, "example.hdr")])
        self.tr_dir_contents_script = FileTransfer(
            True, os.path.join(self.special_path, "dir_contents.py"),
            168, "dir_contents")
        self.tr_mff_script = FileTransfer(
            True, os.path.join(self.special_path, "multiple_file_format.py"),
            168, "mdd_script")
        self.lo_mff_stdout = os.path.join(
            self.special_path, 'stdout_multiple_file_format')

        # Output path
        self.tr_out_dir = FileTransfer(
            False, os.path.join(self.output_dir, "transfered_dir"),
            168, "out_dir")
        self.tr_img_out_file = FileTransfer(
            False, os.path.join(self.output_dir, "example.img"),
            168, "img_out",
            [os.path.join(self.output_dir, "example.img"),
             os.path.join(self.output_dir, "example.hdr")])
#
        for i in range(1, 5):
            self.tr_script[i] = FileTransfer(
                True, os.path.join(self.complete_path, "job" + str(i) + ".py"),
                168, "job" + str(i) + "_py")
            self.tr_stdin[i] = FileTransfer(
                True, os.path.join(self.complete_path, "stdin" + str(i)),
                168, "stdin" + str(i))
            self.lo_stdout[i] = os.path.join(self.models_path,
                                             "stdout_job" + str(i))

        for i in [11, 12, 2, 3, 4]:
            self.tr_file[i] = FileTransfer(
                False, os.path.join(self.output_dir, "file" + str(i)),
                168, "file" + str(i))
            self.lo_out_model_file[i] = os.path.join(
                self.models_path, "file" + str(i))
Beispiel #12
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call([
                'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn
            ])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
Beispiel #13
0
def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(
                ['python', '-c', cfunc_marshal, params_fn, func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'python', '-c', cfunc, param_file, func_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results
Beispiel #14
0
from soma_workflow.client import Job, Workflow, WorkflowController, Helper, FileTransfer
from soma_workflow.configuration import Configuration
# from soma_workflow.connection import RemoteConnection

user = '******'
try:
    import pwd
    user = pwd.getpwuid(os.getuid()).pw_name
except Exception:
    pass

controller = WorkflowController("Gabriel", user)

# FileTransfer creation for input files
file1 = FileTransfer(is_input=True,
                     client_path="%s/create_file.py" %
                     Configuration.get_home_dir(),
                     name="script")

file2 = FileTransfer(is_input=True,
                     client_path="%s/output_file" %
                     Configuration.get_home_dir(),
                     name="file created on the server")

# Job and Workflow
run_script = Job(command=["python", file1, file2],
                 name="copy",
                 referenced_input_files=[file1],
                 referenced_output_files=[file2])

workflow = Workflow(jobs=[run_script], dependencies=[])