Python WorkflowController.WorkflowControllerの例、soma_workflow.client.WorkflowController.WorkflowController Pythonの例

コード例 #1

0

ファイルを表示

ファイル: setup_client2server.py プロジェクト: denisri/soma-workflow

def SimpleJobExample(configuration_item_name, userid, userpw=None):
    """ Dummy workflow to test the install

    Parameters
    ----------
    configuration_item_name: str
        the name of the configuration item (ex. "Gabriel")
    userid: str
        user name on the server side
    userpw: str (optional)
        user password to login the server using ssh.
        If you want to use "id_rsa.pub", just leave userpw to None
        To copy the public key on the server use ssh-copy-id -i name@server.
    """
    job_1 = Job(command=["sleep", "5"], name="job 1")
    job_2 = Job(command=["sleep", "5"], name="job 2")
    job_3 = Job(command=["sleep", "5"], name="job 3")
    job_4 = Job(command=["sleep", "5"], name="job 4")

    jobs = [job_1, job_2, job_3, job_4]
    dependencies = [(job_1, job_2), (job_1, job_3), (job_2, job_4),
                    (job_3, job_4)]

    workflow = Workflow(jobs=jobs, dependencies=dependencies)

    controller = WorkflowController(configuration_item_name, userid, userpw)

    controller.submit_workflow(workflow=workflow, name="TestConnectionExample")

コード例 #2

0

ファイルを表示

ファイル: job_tests.py プロジェクト: isadenghien/soma-workflow

 def setup_connection(cls, resource_id, login, password):
     cls.login = login
     cls.password = password
     cls.resource_id = resource_id
     cls.wf_ctrl = WorkflowController(resource_id, login, password)
     cls.transfer_timeout = -24
     cls.jobs_timeout = 1
     cls.job_examples = JobExamples(cls.wf_ctrl, 'python',
                                    cls.transfer_timeout, cls.jobs_timeout)

コード例 #3

0

ファイルを表示

group_elements = []

first_job = Job(command=["sleep", "10"], name="first job")
last_job = Job(command=["sleep", "10"], name="last job")

jobs.append(first_job)
jobs.append(last_job)

for i in range(0, 30):
    job = Job(command=["sleep", "60"], name="job " + repr(i))

    jobs.append(job)

    dependencies.append((first_job, job))
    dependencies.append((job, last_job))

    group_elements.append(job)

thirty_jobs_group = Group(elements=group_elements, name="my 30 jobs")

workflow = Workflow(jobs=jobs,
                    dependencies=dependencies,
                    root_group=[first_job, thirty_jobs_group, last_job])

login = '******'
password = '******'
controller = WorkflowController("DSV_cluster", login, password)

controller.submit_workflow(workflow=workflow,
                           name="Simple workflow with group")

コード例 #4

0

ファイルを表示

def morphologist_all(t1file, sid, outdir, study="morphologist", waittime=10,
                     somaworkflow=False,
                     spmexec="/i2bm/local/spm8-standalone/run_spm8.sh",
                     spmdir="/i2bm/local/spm8-standalone"):
    """ Performs all the Morphologist steps.

    Steps:

    1- Ensure image orientation and reorient it if needed (Prepare Subject for
       Anatomical Pipeline).
    2- Computation of a brain mask (Brain Mask Segmentation).
    3- Computation of a mask for each hemisphere (Split Brain Mask).
    4- A grey/white classification of each hemisphere to perform "Voxel Based
       Morphometry" (Grey White Classification) and spherical triangulation of
       cortical hemispheres (Grey White Surface).
    5- Spherical triangulation of the external interface of the cortex of one
       or two hemispheres (Get Spherical Hemi Surface).
    6- Computation of a graph representing the cortical fold topography
       (Cortical Fold Graph).
    7- Automatic identification of the cortical sulci (Automatic Sulci
       Recognition), located in the "sulci" toolbox.

    The execution is performed with soma_workflow that has to be installed in
    the bv_env environment.

    To check the worklow submission, use the 'soma_workflow_gui' command.

    If the input 't1file' has no the expected extension, an Exception will
    be raised.
    If the $outdir/$study/$sid has already been created, an Exception will
    be raised.

    Parameters
    ----------
    t1file: str (mandatory)
        the path to a ".nii.gz" anatomical T1 weighted file.
    sid: str (mandatory)
        a subject identifier.
    outdir: str (mandatory)
        the morphologist output files will be written in $outdir/$study/$sid.
    study: str (mandatory)
        the name of the study.
    waittime: float (optional, default 10)
        a delay (in seconds) used to check the worflow status.
    somaworkflow: bool (optional, default False)
        if True use somaworkflow for the execution.
    spmexec: str (optional)
        the path to the standalone SPM execution file.
    spmdir: str (optional)
        the standalone SPM directory.

    Returns
    -------
    wffile: str
        a file containing the submitted workflow.
    wfid: int
        the submitted workflow identifier.
    wfstatus: str
        the submited worflow status afer 'waittime' seconds.
    """
    # Check roughly the input file extension
    if not t1file.endswith(".nii.gz"):
        raise Exception("'{0}' is not a COMPRESSED NIFTI file.".format(t1file))

    # Create a configuration for the morphologist study
    study_config = StudyConfig(
        modules=StudyConfig.default_modules + ["FomConfig", "BrainVISAConfig"])
    study_dict = {
        "name": "morphologist_fom",
        "input_directory": outdir,
        "output_directory": outdir,
        "input_fom": "morphologist-auto-nonoverlap-1.0",
        "output_fom": "morphologist-auto-nonoverlap-1.0",
        "shared_fom": "shared-brainvisa-1.0",
        "spm_directory": spmdir,
        "use_soma_workflow": True,
        "use_fom": True,
        "spm_standalone": True,
        "use_matlab": False,
        "volumes_format": "NIFTI gz",
        "meshes_format": "GIFTI",
        "use_spm": True,
        "spm_exec": spmexec,
        "study_config.somaworkflow_computing_resource": "localhost",
        "somaworkflow_computing_resources_config": {
            "localhost": {
            }
        }
    }
    study_config.set_study_configuration(study_dict)

    # Create the morphologist pipeline
    pipeline = get_process_instance(
        "morphologist.capsul.morphologist.Morphologist")
    morphologist_pipeline = process_with_fom.ProcessWithFom(
        pipeline, study_config)
    morphologist_pipeline.attributes = dict(
        (trait_name, getattr(morphologist_pipeline, trait_name))
        for trait_name in morphologist_pipeline.user_traits())
    morphologist_pipeline.attributes["center"] = "morphologist"
    morphologist_pipeline.attributes["subject"] = sid
    morphologist_pipeline.create_completion()

    # Create morphologist expected tree
    # ToDo: use ImportT1 from axon
    subjectdir = os.path.join(outdir, study, sid)
    if os.path.isdir(subjectdir):
        raise Exception("Folder '{0}' already created.".format(subjectdir))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "default_analysis", "folds", "3.1", "default_session_auto"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "registration"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "segmentation", "mesh"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "tmp"))

    # Copy T1 file in the morphologist expected location
    destfile = os.path.join(subjectdir, "t1mri",
                            "default_acquisition", sid + ".nii.gz")
    shutil.copy(t1file, destfile)

    # Create source_referential morphologist expected file
    source_referential = {"uuid": str(soma.uuid.Uuid())}
    referential_file = os.path.join(
        subjectdir, "t1mri", "default_acquisition", "registration",
        "RawT1-{0}_default_acquisition.referential".format(sid))
    attributes = "attributes = {0}".format(json.dumps(source_referential))
    with open(referential_file, "w") as openfile:
        openfile.write(attributes)

    # Create a worflow from the morphologist pipeline
    workflow = Workflow(name="{0} {1}".format(study, sid),
                        jobs=[])
    workflow.root_group = []

    # Create the workflow
    wf = pipeline_workflow.workflow_from_pipeline(
        morphologist_pipeline.process, study_config=study_config)
    workflow.add_workflow(wf, as_group="{0}_{1}".format(study, sid))
    wffile = os.path.join(subjectdir, "{0}.wf".format(study))
    pickle.dump(workflow, open(wffile, "w"))

    # Execute the workflow with somaworkflow
    if somaworkflow:
        controller = WorkflowController()
        wfid = controller.submit_workflow(
            workflow=workflow, name="{0}_{1}".format(study, sid))

        # Return the worflow status after execution
        while True:
            time.sleep(waittime)
            wfstatus = controller.workflow_status(wfid)
            if wfstatus not in [
                    "worklflow_not_started", "workflow_in_progress"]:
                break

    # Execute the workflow with subprocess
    else:
        # -> construct the ordered list of commands to be executed
        workflow_repr = workflow.to_dict()
        graph = Graph()
        for job in workflow_repr["jobs"]:
            graph.add_node(GraphNode(job, None))
        for link in workflow_repr["dependencies"]:
            graph.add_link(link[0], link[1])
        ordered_nodes = [str(node[0]) for node in graph.topological_sort()]
        commands = []
        jobs = workflow_repr["serialized_jobs"]
        temporaries = workflow_repr["serialized_temporary_paths"]
        barriers = workflow_repr["serialized_barriers"]
        for index in ordered_nodes:
            if index in jobs:
                commands.append(jobs[index]["command"])
            elif index in barriers:
                continue
            else:
                raise Exception("Unexpected node in workflow.")

        # -> Go through all commands
        tmpmap = {}
        for cmd in commands:
            # -> deal with temporary files
            for index, item in enumerate(cmd):
                if not isinstance(item, basestring):
                    if str(item) not in tmpmap:
                        if str(item) in temporaries:
                            struct = temporaries[str(item)]
                            name = cmd[2].split(";")[1].split()[-1]
                            tmppath = os.path.join(
                                subjectdir, "t1mri", "default_acquisition",
                                "tmp", str(item) + name + struct["suffix"])
                            tmpmap[str(item)] = tmppath
                        else:
                            raise MorphologistError(
                                "Can't complete command '{0}'.".format(
                                    cmd))
                    cmd[index] = tmpmap[str(item)]

            # -> execute the command
            worker = MorphologistWrapper(cmd)
            worker()
            if worker.exitcode != 0:
                raise MorphologistRuntimeError(
                    " ".join(worker.cmd), worker.stderr)

        wfstatus = "Done"
        wfid = "subprocess"

    return wffile, wfid, wfstatus

コード例 #5

0

ファイルを表示

ファイル: engine.py プロジェクト: ilgrad/pylearn-epac

    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="transform",
        ...                                 num_processes=3,
                                            remove_finished_wf=False)
        >>> tree_root_node = sfw_engine.run(**Xy)
        light mode
        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_f1': [ 0.6  0.6], 'y/test/score_recall_mean/pval': [ 0.5], 'y/test/score_recall/pval': [ 0.   0.5], 'y/test/score_accuracy/pval': [ 0.], 'y/test/score_f1/pval': [ 0.   0.5], 'y/test/score_precision/pval': [ 0.5  0. ], 'y/test/score_precision': [ 0.6  0.6], 'y/test/score_recall': [ 0.6  0.6], 'y/test/score_accuracy': 0.6, 'y/test/score_recall_mean': 0.6}])

        '''
        try:
            from soma_workflow.client import Job, Workflow
            from soma_workflow.client import Helper, FileTransfer
            from soma_workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        is_run_local = False
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
            is_run_local = True
        # print "is_run_local=", is_run_local
        if not is_run_local:
            ft_working_directory = FileTransfer(is_input=True,
                                                client_path=tmp_work_dir_path,
                                                name="working directory")
        else:
            ft_working_directory = tmp_work_dir_path

        ## Save the database and tree to working directory
        ## ===============================================
        # np.savez(os.path.join(tmp_work_dir_path,
        # SomaWorkflowEngine.dataset_relative_path), **Xy)
        save_dataset(SomaWorkflowEngine.dataset_relative_path, **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = save_job_list(tmp_work_dir_path, nodesinput_list)

        ## Build soma-workflow
        ## ===================
        jobs = self._create_jobs(keysfile_list, is_run_local,
                                 ft_working_directory)
        soma_workflow = Workflow(jobs=jobs)

        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow",
                                           queue=self.queue)
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)

        self.engine_info = self.get_engine_info(controller, wf_id)

        if self.remove_finished_wf:
            controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path) and self.remove_local_tree:
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root

コード例 #6

0

ファイルを表示

ファイル: workflow_test.py プロジェクト: denisri/soma-workflow

    def run_test(cls, debug=False, interactive=False, **kwargs):
        sys.stdout.write("********* soma-workflow tests: %s *********\n" %
                         cls.__name__)

        config_file_path = Configuration.search_config_path()
        resource_ids = Configuration.get_configured_resources(config_file_path)

        enabled_resources = getattr(WorkflowTest, 'enabled_resources', None)
        enable_resources = []
        if not hasattr(WorkflowTest, 'resource_pass'):
            WorkflowTest.resource_pass = {}

        for resource_id in resource_ids:
            sys.stdout.write("============ Resource : " + resource_id +
                             " =================== \n")
            config = Configuration.load_from_file(resource_id,
                                                  config_file_path)

            if not interactive \
                    and ((enabled_resources is None
                          and config.get_mode() != LIGHT_MODE)
                         or (enabled_resources is not None
                             and resource_id not in enabled_resources)):
                sys.stdout.write('Resource %s is not tested in '
                                 'non-interactive mode\n' % resource_id)
                continue  # skip login/password ask
            if interactive:
                if enabled_resources is None:
                    sys.stdout.write("Do you want to test the resource "
                                     "%s (Y/n) ? " % resource_id)
                    sys.stdout.flush()
                    test_resource = sys.stdin.readline()
                    if test_resource.strip() in ['no', 'n', 'N', 'No', 'NO']:
                        # Skip the resource
                        sys.stdout.write('Resource %s is not tested \n' %
                                         resource_id)
                        sys.stdout.flush()
                        continue
                    enable_resources.append(resource_id)
                    (login, password) = get_user_id(resource_id, config)
                    WorkflowTest.resource_pass[resource_id] = (login, password)
                else:
                    if resource_id not in enabled_resources:
                        continue
                    (login, password) = WorkflowTest.resource_pass[resource_id]
            else:
                (login, password) = get_user_id(resource_id,
                                                config,
                                                interactive=interactive)

            if config.get_mode() == LIGHT_MODE:
                # use a temporary sqlite database in soma-workflow to avoid
                # concurrent access problems
                tmpdb = tempfile.mkstemp('.db', prefix='swf_')
                os.close(tmpdb[0])
                os.unlink(tmpdb[1])
                # and so on for transfers / stdio files directory
                tmptrans = tempfile.mkdtemp(prefix='swf_')
                config._database_file = tmpdb[1]
                config._transfered_file_dir = tmptrans

            wf_controller = None
            try:

                with suppress_stdout(debug):
                    wf_controller = WorkflowController(resource_id,
                                                       login,
                                                       password,
                                                       config=config)
                    cls.setup_wf_controller(wf_controller)

                allowed_config = cls.allowed_config[:]
                for configuration in cls.allowed_config:
                    if config.get_mode() != configuration[0]:
                        allowed_config.remove(configuration)
                if len(allowed_config) == 0:
                    sys.stdout.write(
                        "No tests available for the resource %s \n" %
                        resource_id)

                for configuration in allowed_config:
                    (mode, file_system) = configuration
                    sys.stdout.write(
                        "\n---------------------------------------\n")
                    sys.stdout.write("Mode : " + mode + '\n')
                    sys.stdout.write("File system : " + file_system + '\n')
                    cls.setup_path_management(file_system)

                    if file_system in (cls.SHARED_RESOURCE_PATH,
                                       cls.SHARED_TRANSFER) \
                            and not config.get_path_translation():
                        sys.stdout.write(
                            "Paths translation unavailable - not testing "
                            "this case\n")
                        sys.stdout.flush()
                        continue

                    suite_list = []
                    list_tests = []
                    for test in dir(cls):
                        prefix = "test_"
                        if len(test) < len(prefix):
                            continue
                        if test[0:len(prefix)] == prefix:
                            list_tests.append(test)

                    suite_list.append(
                        unittest.TestSuite(list(map(cls, list_tests))))
                    alltests = unittest.TestSuite(suite_list)
                    with suppress_stdout(debug):
                        res = unittest.TextTestRunner(
                            verbosity=2).run(alltests)
                    sys.stdout.flush()
                    sys.stdout.write("after test\n")

                    if len(res.errors) != 0 or len(res.failures) != 0:
                        raise RuntimeError("tests failed.")

            finally:
                sys.stdout.write("del wf_controller")
                if wf_controller:
                    wf_controller.stop_engine()
                del wf_controller
                cls.setup_wf_controller(None)  # del WorkflowController
                sys.stdout.write("deleted.")
                if config.get_mode() == LIGHT_MODE:
                    if not kwargs.get('keep_temporary', False):
                        if os.path.exists(config._database_file):
                            os.unlink(config._database_file)
                        if os.path.exists(config._database_file + '-journal'):
                            os.unlink(config._database_file + '-journal')
                        shutil.rmtree(config._transfered_file_dir)
                    else:
                        print('temporary files kept:')
                        print('databse file:', config._database_file)
                        print('transfers:', config._transfered_file_dir)

        if interactive and enabled_resources is None:
            print('set enabled_resources')
            WorkflowTest.enabled_resources = enable_resources

コード例 #7

0

ファイルを表示

    jobs += group_score + group_significativity + group_merge
    jobs.append(job_final)

    scores = Group(elements=group_score,
                   name="group where test scores are calculated")

    significativity = Group(
        elements=group_significativity,
        name="group where distributions are calculated for significance")

    merge = Group(elements=group_merge, name="group where we merge results")

    workflow = Workflow(jobs=jobs,
                        dependencies=dependencies,
                        root_group=[scores, significativity, merge, job_final])

    Helper.serialize(
        os.path.join(inputs_path, 'optimized_cluster_part_2.somawf'), workflow)

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller = WorkflowController(
        "DSV_cluster_{}".format(login), login,
        password)  #"DSV_cluster_ap259944", login, password

    workflow_id = controller.submit_workflow(workflow=workflow,
                                             name="Cluster optimized part 2")

    print("Finished !!!")

コード例 #8

0

ファイルを表示

ファイル: parallel.py プロジェクト: zddzxxsmile/pyhrf

def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call([
                'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn
            ])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results

コード例 #9

0

ファイルを表示

ファイル: parallel.py プロジェクト: zddzxxsmile/pyhrf

def remote_map_marshal(func, largs=None, lkwargs=None, mode='local'):

    if largs is None:
        if lkwargs is not None:
            largs = [[]] * len(lkwargs)
        else:
            largs = []

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)

    if mode == 'local':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local_with_dumps':

        func_fn = './func.marshal'
        dump_func(func, func_fn)
        results = []
        for i, params in enumerate(all_args):
            print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            print 'call subprocess ...'
            subprocess.call(
                ['python', '-c', cfunc_marshal, params_fn, func_fn, output_fn])
            print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")
        func_fn = op.join(data_dir, 'func.marshal')
        dump_func(func, func_fn)
        func_file = FileTransfer(is_input=True,
                                 client_path=func_fn,
                                 name="func_file")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'python', '-c', cfunc, param_file, func_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[func_file, param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])

        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fout = open(op.join(data_dir, 'output_%d.pck' % i))
            results.append(cPickle.load(fout))
            fout.close()
        return results

コード例 #10

0

ファイルを表示

ファイル: parallel.py プロジェクト: zddzxxsmile/pyhrf

def run_soma_workflow(treatments,
                      exec_cmd,
                      tmp_local_dirs,
                      server_id,
                      remote_host,
                      remote_user,
                      remote_pathes,
                      local_result_pathes,
                      label_for_cluster,
                      wait_ending=False):
    """Dispatch treatments using soma-workflow.

    Parameters
    ----------
    treatments
        it is a dict mapping a treatment name to a treatment object
    exec_cmd
        it is the command to run on each ROI data.
    tmp_local_dirs
        it is a dict mapping a treatment name to a local tmp dir (used to store a temporary configuration file)
    server_id
        it is the server ID as expected by WorkflowController
    remote_host
        it is the remote machine where treatments are treated in parallel
    remote_user
        it is used to log in remote_host
    remote_pathes
        it is a dict mapping a treatment name to an existing remote dir which will be used to store ROI data and result
        files
    local_result_pathes
        it is a dict mapping a treatment name to a local path where final results will be sorted (host will send it
        there by scp)
    label_for_cluster
        it is the base name used to label workflows and sub jobs
    """

    import getpass
    from socket import gethostname

    local_user = getpass.getuser()
    local_host = gethostname()

    all_nodes = []
    all_deps = []
    all_groups = []
    split_jobs = []
    for t_id, treatment in treatments.iteritems():

        tmp_local_dir = tmp_local_dirs[t_id]
        remote_path = remote_pathes[t_id]
        local_result_path = local_result_pathes[t_id]

        sj, n, d, g = prepare_treatment_jobs(
            treatment, tmp_local_dir, local_result_path, local_user,
            local_host, remote_host, remote_user, remote_path,
            label_for_cluster + '-' + str(t_id))
        all_nodes.extend(n)
        all_deps.extend(d)
        all_groups.append(g)
        split_jobs.append(sj)

    # Jobs for data splitting should be done sequentially.
    # If they're done in parallel, they may flood the remote file system
    for isj in xrange(len(split_jobs)):
        if isj + 1 < len(split_jobs):
            all_deps.append((split_jobs[isj], split_jobs[isj + 1]))

    # # Be sure that all splitting jobs are done first:
    # # Is there a better way ?
    # for n in all_nodes:
    #     for sjob in split_jobs:
    #         all_deps.append((sjob,n))
    # Does not seem to work well -> maybe to many deps ?

    workflow = Workflow(all_nodes + split_jobs,
                        all_deps,
                        root_group=all_groups)

    # f = open('/tmp/workflow.pck','w')
    # cPickle.dump(workflow, f)
    # f.close()

    logger.info('Open connection ...')
    connection = WorkflowController(server_id, remote_user)

    logger.info('Submit workflow ...')
    wf_id = connection.submit_workflow(
        workflow=workflow,
        # expiration_date="",
        # queue="run32",
        name=label_for_cluster + '-' + local_user)
    #wf = connection.workflow(wf_id)

    if wait_ending:  # wait for result
        logger.info('Wait for workflow to end and make outputs ...')
        Helper.wait_workflow(wf_id, connection)

        for t_id, local_result_path in local_result_pathes.iteritems():
            treatment = treatments[t_id]
            rfilename = treatment.result_dump_file
            if rfilename is None:
                rfilename = 'result.pck'
            local_result_file = op.join(local_result_path,
                                        op.basename(rfilename))

            if not op.exists(local_result_file):
                raise Exception('Local result does not exist "%s"' %
                                local_result_file)

        if treatment.analyser.outFile is not None:
            # return result only for last treatment ...
            print 'Load result from %s ...' % local_result_file
            if splitext(local_result_file)[1] == '.gz':
                import gzip
                fresult = gzip.open(local_result_file)
            else:
                fresult = open(local_result_file)
            results = cPickle.load(fresult)
            fresult.close()
            # print 'Make outputs ...'
            #treatment.output(results, dump=False)
            logger.info('Cleaning tmp dirs ...')
            for tmp_dir in tmp_local_dirs.itervalues():
                shutil.rmtree(tmp_dir)

            return results
    else:
        logger.info('Cleaning tmp dirs ...')
        for tmp_dir in tmp_local_dirs.itervalues():
            shutil.rmtree(tmp_dir)

        logger.info('Workflow sent, returning ...')
        return []

コード例 #11

0

ファイルを表示

ファイル: first_example.py プロジェクト: cathyphilippe/mempamal

p = [("scaler", s1), ("logit", s2)]
est = Pipeline(p)

# get the iris dataset
X, y = iris.get_data()

# jsonify the method and a cross-validation scheme
method_conf = JSONify_estimator(est, out="./est.json")
cv_conf = JSONify_cv(StratifiedKFold,
                     cv_kwargs={"n_folds": 5},
                     score_func=f1_score,
                     stratified=True,
                     out="./cv.json")
# build the dataset file
dataset = build_dataset(X, y, method_conf, cv_conf, ".", compress=1)

# create the workflow in the internal representation
wfi = create_wf(dataset['folds'], cv_conf, method_conf, ".", verbose=True)
# save to soma-workflow format
wf = save_wf(wfi, "./workflow.json", mode="soma-workflow")

# create a controler and submit
controler = WorkflowController()
wf_id = controler.submit_workflow(workflow=wf, name="first example")

# wait for completion
while controler.workflow_status(wf_id) != 'workflow_done':
    time.sleep(2)
# read final result file
print(joblib.load('./final_res.pkl'))

コード例 #12

0

ファイルを表示

ファイル: file_transfer.py プロジェクト: denisri/soma-workflow

from __future__ import print_function
import time
import os

from soma_workflow.client import Job, Workflow, WorkflowController, Helper, FileTransfer
from soma_workflow.configuration import Configuration
# from soma_workflow.connection import RemoteConnection

user = '******'
try:
    import pwd
    user = pwd.getpwuid(os.getuid()).pw_name
except Exception:
    pass

controller = WorkflowController("Gabriel", user)

# FileTransfer creation for input files
file1 = FileTransfer(is_input=True,
                     client_path="%s/create_file.py" %
                     Configuration.get_home_dir(),
                     name="script")

file2 = FileTransfer(is_input=True,
                     client_path="%s/output_file" %
                     Configuration.get_home_dir(),
                     name="file created on the server")

# Job and Workflow
run_script = Job(command=["python", file1, file2],
                 name="copy",

コード例 #13

0

ファイルを表示

echo %s
""" % test_bash_script
    fileout.write(filecontent)
    fileout.close()
    os.chdir(cur_work_dir)

    job1 = Job(command=[u"touch", test_filepath],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)
    job2 = Job(command=["%s/readfile" % cur_file_dir, test_bash_script],
               name="epac_job_test",
               working_directory=tmp_work_dir_path)

    soma_workflow = Workflow(jobs=[job1, job2])

    resource_id = socket.gethostname()
    controller = WorkflowController(resource_id, "", "")
    ## run soma-workflow
    ## =================
    wf_id = controller.submit_workflow(workflow=soma_workflow,
                                       name="epac workflow")
    Helper.wait_workflow(wf_id, controller)
    nb_failed_jobs = len(Helper.list_failed_jobs(wf_id, controller))
    if nb_failed_jobs > 0:
        raise ValueError("Soma-workflow error, cannot use working directory")

    if not os.path.isfile(os.path.join(tmp_work_dir_path, test_filepath)):
        raise ValueError("Soma-workflow cannot define working directory")
    else:
        print("OK for creating new file in working directory")

コード例 #14

0

ファイルを表示

            "--output_r2",
            os.path.join(args.output, 'r2'), "--output_distribution",
            os.path.join(args.output, 'distribution'), "--x", args.x, "--y",
            args.y, "--shuffling", shuffling, "--n_permutations",
            args.nb_permutations, "--alpha_percentile", args.alpha_percentile
        ],
                  name="job {} - alpha {}".format(run, alpha),
                  working_directory=scripts_path)
        group_significativity.append(job)
        jobs.append(job)

    distribution_voxels = Group(elements=group_significativity,
                                name="Voxel wise fitting of the models")

    workflow2 = Workflow(jobs=jobs, root_group=[distribution_voxels])

    ### Submit the workflow to computing resource (configured in the client-server mode)

    controller2 = WorkflowController(
        "DSV_cluster_ap259944", args.login,
        args.password)  #"DSV_cluster_ap259944", args.login, args.password

    workflow_id2 = controller2.submit_workflow(workflow=workflow2,
                                               name="Voxel-wise computations")

    # You may use the gui or manually transfer the files:
    manual = True
    if manual:
        Helper.transfer_input_files(workflow_id2, controller2)
        Helper.wait_workflow(workflow_id2, controller2)
        Helper.transfer_output_files(workflow_id2, controller2)