Example #1
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs)
                 for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call(['python', '-c', cfunc % (fmodule, fname),
                             params_fn, output_fn])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=['pyhrf_exec_pyfunc', fmodule, fname,
                               param_file, output_file],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(
            workflow=workflow, name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results
Example #2
0
    def run(self, parallel=None, n_jobs=None):
        """
        Run the analysis: load data, run estimation, output results
        """
        if parallel is None:
            result = self.execute()
        elif parallel == 'local':
            cfg_parallel = pyhrf.cfg['parallel-local']
            try:
                from joblib import Parallel, delayed
            except ImportError:
                raise Exception(
                    'Can not import joblib. It is required to '
                    'enable parallel processing on a local machine.')

            effective_level = logger.getEffectiveLevel()
            if effective_level == logging.DEBUG:
                parallel_verb = 11
            elif effective_level == logging.INFO:
                parallel_verb = 2
            else:
                parallel_verb = 0

            if n_jobs is None:
                if cfg_parallel["nb_procs"]:
                    n_jobs = cfg_parallel["nb_procs"]
                else:
                    n_jobs = available_cpu_count()

            p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
            result = p(delayed(exec_t)(t) for t in self.split(output_dir=None))
            # join list of lists:
            result = list(itertools.chain.from_iterable(result))

        elif parallel == 'LAN':

            from pyhrf import grid
            cfg_parallel = pyhrf.cfg['parallel-LAN']
            remoteUser = cfg_parallel['user']

            # 1. Some checks on input/output directory
            remoteDir = cfg_parallel['remote_path']
            # At the end, results will be retrieved direclty from remoteDir,
            # which has to be readable
            if remoteDir is None or not op.exists(remoteDir):
                raise Exception('Remote directory is not readable (%s).'
                                'Consider mounting it with sshfs.' % remoteDir)

            # Try if remoteDir is writeable, so that we don't need to upload
            # data via ssh
            remote_writeable = False
            if os.access(remoteDir, os.W_OK):
                remote_writeable = True
                tmpDir = remoteDir
            else:
                logger.info('Remote dir is not writeable -> using tmp '
                            'dir to store splitted data & then upload.')

            # 2. split roi data
            logger.info('Path to store sub treatments: %s', tmpDir)
            treatments_dump_files = []
            self.split(dump_sub_results=True,
                       output_dir=tmpDir,
                       make_sub_outputs=False,
                       output_file_list=treatments_dump_files)

            # 3. copy data to remote directory
            if not remote_writeable:
                host = cfg_parallel['remote_host']
                logger.info('Uploading data to %s ...', remoteDir)
                remote_input_files = remote_copy(treatments_dump_files, host,
                                                 remoteUser, remoteDir)

            # 4. create job list
            tasks_list = []
            for f in treatments_dump_files:
                f = op.join(remoteDir, op.basename(f))
                nice = cfg_parallel['niceness']
                tasks_list.append(
                    'nice -n %d %s -v%d -t "%s"' %
                    (nice, 'pyhrf_jde_estim', logger.getEffectiveLevel(), f))
            mode = 'dispatch'
            tasks = grid.read_tasks(';'.join(tasks_list), mode)
            timeslot = grid.read_timeslot('allday')
            hosts = grid.read_hosts(cfg_parallel['hosts'])

            if self.output_dir is not None:
                brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch')
                logfile = op.join(self.output_dir, 'pyhrf-parallel.log')
                logger.info('Log file for process dispatching: %s', logfile)
            else:
                brokenfile = None
                logfile = None

            # 3. launch them
            logger.info('Dispatching processes ...')
            try:
                grid.run_grid(mode,
                              hosts,
                              'rsa',
                              tasks,
                              timeslot,
                              brokenfile,
                              logfile,
                              user=remoteUser)
                grid.kill_threads()
            except KeyboardInterrupt:
                grid.quit(None, None)

            if brokenfile is not None and len(
                    open(brokenfile).readlines()) > 0:
                logger.info('There are some broken commands, trying again ...')
                try:
                    tasks = grid.read_tasks(brokenfile, mode)
                    grid.run_grid(mode,
                                  hosts,
                                  'rsa',
                                  tasks,
                                  timeslot,
                                  brokenfile,
                                  logfile,
                                  user=remoteUser)
                    grid.kill_threads()
                except KeyboardInterrupt:
                    grid.quit(None, None)

            # 3.1 grab everything back ??
            # try:
            # "scp %s@%s:%s %s" %(remoteUser,host,
            #                     op.join(remoteDir,'result*'),
            #                     op.abspath(op.dirname(options.cfgFile))))
            # TODO : test if everything went fine

            # 4. merge all results and create outputs
            result = []
            # if op.exists(remoteDir): TODO :scp if remoteDir not readable
            nb_treatments = len(treatments_dump_files)
            remote_result_files = [
                op.join(remoteDir, 'result_%04d.pck' % i)
                for i in range(nb_treatments)
            ]
            logger.info('remote_result_files: %s', str(remote_result_files))
            nres = len(filter(op.exists, remote_result_files))
            if nres == nb_treatments:
                logger.info('Grabbing results ...')
                for fnresult in remote_result_files:
                    fresult = open(fnresult)
                    result.append(cPickle.load(fresult)[0])
                    fresult.close()
            else:
                print 'Found only %d result files (expected %d)' \
                    % (nres, nb_treatments)
                print 'Something went wrong, check the log files'
            if not remote_writeable:
                logger.info('Cleaning tmp dir (%s)...', tmpDir)
                shutil.rmtree(tmpDir)
                logger.info('Cleaning up remote dir (%s) through ssh ...',
                            remoteDir)
                cmd = 'ssh %s@%s rm -f "%s" "%s" ' \
                    % (remoteUser, host, ' '.join(remote_result_files),
                       ' '.join(remote_input_files))
                logger.info(cmd)
                os.system(cmd)
            else:
                if 0:
                    logger.info('Cleaning up remote dir (%s)...', remoteDir)
                    for f in os.listdir(remoteDir):
                        os.remove(op.join(remoteDir, f))

        elif parallel == 'cluster':

            from pyhrf.parallel import run_soma_workflow
            cfg = pyhrf.cfg['parallel-cluster']
            # create tmp remote path:
            date_now = time.strftime('%c').replace(' ', '_').replace(':', '_')
            remote_path = op.join(cfg['remote_path'], date_now)
            logger.info('Create tmp remote dir: %s', remote_path)
            remote_mkdir(cfg['server'], cfg['user'], remote_path)
            t_name = 'default_treatment'
            tmp_dir = pyhrf.get_tmp_path()
            label_for_cluster = self.analyser.get_label()
            if self.output_dir is None:
                out_dir = pyhrf.get_tmp_path()
            else:
                out_dir = self.output_dir
            result = run_soma_workflow({t_name: self},
                                       'pyhrf_jde_estim', {t_name: tmp_dir},
                                       cfg['server_id'],
                                       cfg['server'],
                                       cfg['user'], {t_name: remote_path},
                                       {t_name: op.abspath(out_dir)},
                                       label_for_cluster,
                                       wait_ending=True)

        else:
            raise Exception('Parallel mode "%s" not available' % parallel)

        logger.info('Retrieved %d results', len(result))
        return self.output(result, (self.result_dump_file is not None),
                           self.make_outputs)
Example #3
0
    def run(self, parallel=None, n_jobs=None):
        """
        Run the analysis: load data, run estimation, output results
        """
        if parallel is None:
            result = self.execute()
        elif parallel == 'local':
            cfg_parallel = pyhrf.cfg['parallel-local']
            try:
                from joblib import Parallel, delayed
            except ImportError:
                raise Exception('Can not import joblib. It is required to '
                                'enable parallel processing on a local machine.')

            effective_level = logger.getEffectiveLevel()
            if effective_level == logging.DEBUG:
                parallel_verb = 11
            elif effective_level == logging.INFO:
                parallel_verb = 2
            else:
                parallel_verb = 0

            if n_jobs is None:
                if cfg_parallel["nb_procs"]:
                    n_jobs = cfg_parallel["nb_procs"]
                else:
                    n_jobs = available_cpu_count()

            p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
            result = p(delayed(exec_t)(t) for t in self.split(output_dir=None))
            # join list of lists:
            result = list(itertools.chain.from_iterable(result))

        elif parallel == 'LAN':

            from pyhrf import grid
            cfg_parallel = pyhrf.cfg['parallel-LAN']
            remoteUser = cfg_parallel['user']

            # 1. Some checks on input/output directory
            remoteDir = cfg_parallel['remote_path']
            # At the end, results will be retrieved direclty from remoteDir,
            # which has to be readable
            if remoteDir is None or not op.exists(remoteDir):
                raise Exception('Remote directory is not readable (%s).'
                                'Consider mounting it with sshfs.'
                                % remoteDir)

            # Try if remoteDir is writeable, so that we don't need to upload
            # data via ssh
            remote_writeable = False
            if os.access(remoteDir, os.W_OK):
                remote_writeable = True
                tmpDir = remoteDir
            else:
                logger.info('Remote dir is not writeable -> using tmp '
                            'dir to store splitted data & then upload.')

            # 2. split roi data
            logger.info('Path to store sub treatments: %s', tmpDir)
            treatments_dump_files = []
            self.split(dump_sub_results=True, output_dir=tmpDir,
                       make_sub_outputs=False,
                       output_file_list=treatments_dump_files)

            # 3. copy data to remote directory
            if not remote_writeable:
                host = cfg_parallel['remote_host']
                logger.info('Uploading data to %s ...', remoteDir)
                remote_input_files = remote_copy(treatments_dump_files,
                                                 host, remoteUser, remoteDir)

            # 4. create job list
            tasks_list = []
            for f in treatments_dump_files:
                f = op.join(remoteDir, op.basename(f))
                nice = cfg_parallel['niceness']
                tasks_list.append('nice -n %d %s -v%d -t "%s"'
                                  % (nice, 'pyhrf_jde_estim',
                                     logger.getEffectiveLevel(), f))
            mode = 'dispatch'
            tasks = grid.read_tasks(';'.join(tasks_list), mode)
            timeslot = grid.read_timeslot('allday')
            hosts = grid.read_hosts(cfg_parallel['hosts'])

            if self.output_dir is not None:
                brokenfile = op.join(self.output_dir, 'pyhrf-broken_cmd.batch')
                logfile = op.join(self.output_dir, 'pyhrf-parallel.log')
                logger.info('Log file for process dispatching: %s', logfile)
            else:
                brokenfile = None
                logfile = None

            # 3. launch them
            logger.info('Dispatching processes ...')
            try:
                grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                              logfile, user=remoteUser)
                grid.kill_threads()
            except KeyboardInterrupt:
                grid.quit(None, None)

            if brokenfile is not None and len(open(brokenfile).readlines()) > 0:
                logger.info('There are some broken commands, trying again ...')
                try:
                    tasks = grid.read_tasks(brokenfile, mode)
                    grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                                  logfile, user=remoteUser)
                    grid.kill_threads()
                except KeyboardInterrupt:
                    grid.quit(None, None)

            # 3.1 grab everything back ??
            # try:
                # "scp %s@%s:%s %s" %(remoteUser,host,
                #                     op.join(remoteDir,'result*'),
                #                     op.abspath(op.dirname(options.cfgFile))))
            # TODO : test if everything went fine

            # 4. merge all results and create outputs
            result = []
            # if op.exists(remoteDir): TODO :scp if remoteDir not readable
            nb_treatments = len(treatments_dump_files)
            remote_result_files = [op.join(remoteDir, 'result_%04d.pck' % i)
                                   for i in range(nb_treatments)]
            logger.info('remote_result_files: %s', str(remote_result_files))
            nres = len(filter(op.exists, remote_result_files))
            if nres == nb_treatments:
                logger.info('Grabbing results ...')
                for fnresult in remote_result_files:
                    fresult = open(fnresult)
                    result.append(cPickle.load(fresult)[0])
                    fresult.close()
            else:
                print 'Found only %d result files (expected %d)' \
                    % (nres, nb_treatments)
                print 'Something went wrong, check the log files'
            if not remote_writeable:
                logger.info('Cleaning tmp dir (%s)...', tmpDir)
                shutil.rmtree(tmpDir)
                logger.info('Cleaning up remote dir (%s) through ssh ...',
                            remoteDir)
                cmd = 'ssh %s@%s rm -f "%s" "%s" ' \
                    % (remoteUser, host, ' '.join(remote_result_files),
                       ' '.join(remote_input_files))
                logger.info(cmd)
                os.system(cmd)
            else:
                if 0:
                    logger.info('Cleaning up remote dir (%s)...', remoteDir)
                    for f in os.listdir(remoteDir):
                        os.remove(op.join(remoteDir, f))

        elif parallel == 'cluster':

            from pyhrf.parallel import run_soma_workflow
            cfg = pyhrf.cfg['parallel-cluster']
            # create tmp remote path:
            date_now = time.strftime('%c').replace(' ', '_').replace(':', '_')
            remote_path = op.join(cfg['remote_path'], date_now)
            logger.info('Create tmp remote dir: %s', remote_path)
            remote_mkdir(cfg['server'], cfg['user'], remote_path)
            t_name = 'default_treatment'
            tmp_dir = pyhrf.get_tmp_path()
            label_for_cluster = self.analyser.get_label()
            if self.output_dir is None:
                out_dir = pyhrf.get_tmp_path()
            else:
                out_dir = self.output_dir
            result = run_soma_workflow({t_name: self}, 'pyhrf_jde_estim',
                                       {t_name: tmp_dir}, cfg['server_id'],
                                       cfg['server'], cfg['user'],
                                       {t_name: remote_path},
                                       {t_name: op.abspath(out_dir)},
                                       label_for_cluster, wait_ending=True)

        else:
            raise Exception('Parallel mode "%s" not available' % parallel)

        logger.info('Retrieved %d results', len(result))
        return self.output(result, (self.result_dump_file is not None),
                           self.make_outputs)
Example #4
0
def remote_map(func, largs=None, lkwargs=None, mode='serial'):
    """
    Execute a function in parallel on a list of arguments.

    Args:
        *func* (function): function to apply on each item.
                           **this function must be importable on the remote side**
        *largs* (list of tuple): each item in the list is a tuple
                                 containing all positional argument values of the
                                 function
        *lkwargs* (list of dict): each item in the list is a dict
                                  containing all named arguments of the
                                  function mapped to their value.

        *mode* (str): indicates how execution is distributed. Choices are:

            - "serial": single-thread loop on the local machine
            - "local" : use joblib to run tasks in parallel.
                        The number of simultaneous jobs is defined in
                        the configuration section ['parallel-local']['nb_procs']
                        see ~/.pyhrf/config.cfg
            - "remote_cluster: use somaworkflow to run tasks in parallel.
                               The connection setup has to be defined
                               in the configuration section ['parallel-cluster']
                               of ~/.pyhrf/config.cfg.
            - "local_with_dumps": testing purpose only, run each task serially as
                                  a subprocess.

    Returns:
         a list of results

    Raises:
         RemoteException if any remote task has failed

    Example:
    >>> from pyhrf.parallel import remote_map
    >>> def foo(a, b=2): \
        return a + b
    >>> remote_map(foo, [(2,),(3,)], [{'b':5}, {'b':7}])
    [7, 10]
    """
    if largs is None:
        if lkwargs is not None:
            largs = [tuple()] * len(lkwargs)
        else:
            largs = [tuple()]

    if lkwargs is None:
        lkwargs = [{}] * len(largs)

    lkwargs = [merge_default_kwargs(func, kw) for kw in lkwargs]

    assert len(lkwargs) == len(largs)

    all_args = zip(largs, lkwargs)
    # print 'all_args:', all_args

    fmodule = func.__module__
    fname = '.'.join([fmodule, func.__name__])

    if mode == 'serial':
        return [func(*args, **kwargs) for args, kwargs in all_args]
    elif mode == 'local':
        try:
            from joblib import Parallel, delayed
        except ImportError:
            raise ImportError('Can not import joblib. It is '
                              'required to enable parallel '
                              'processing on a local machine.')

        if logger.getEffectiveLevel() == logging.DEBUG:
            parallel_verb = 10
        else:
            parallel_verb = 0
        if pyhrf.cfg['parallel-local']['nb_procs']:
            n_jobs = pyhrf.cfg['parallel-local']['nb_procs']
        else:
            n_jobs = available_cpu_count()
        p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
        return p(delayed(func)(*args, **kwargs) for args, kwargs in all_args)

    elif mode == 'local_with_dumps':
        results = []
        for i, params in enumerate(all_args):
            # print 'params:', params
            params_fn = 'params_%d.pck' % i
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            output_fn = 'output_%d.pck' % i
            # print 'call subprocess ...'
            subprocess.call([
                'python', '-c', cfunc % (fmodule, fname), params_fn, output_fn
            ])
            # print 'Read outputs ...'
            fout = open(output_fn)
            results.append(cPickle.load(fout))
        return results
    elif mode == 'remote_cluster':
        # FileTransfer creation for input files
        #data_dir = './rmap_data'
        data_dir = mkdtemp(prefix="sw_rmap")

        all_jobs = []
        param_files = []
        for i, params in enumerate(all_args):
            params_fn = op.join(data_dir, 'params_%d.pck' % i)
            fparams = open(params_fn, 'wb')
            cPickle.dump(params, fparams)
            fparams.close()
            param_file = FileTransfer(is_input=True,
                                      client_path=params_fn,
                                      name='params_file_%d' % i)
            param_files.append(param_file)
            output_fn = op.join(data_dir, 'output_%d.pck' % i)
            output_file = FileTransfer(is_input=False,
                                       client_path=output_fn,
                                       name='output_file_%d' % i)
            job = Job(command=[
                'pyhrf_exec_pyfunc', fmodule, fname, param_file, output_file
            ],
                      name="rmap, item %d" % i,
                      referenced_input_files=[param_file],
                      referenced_output_files=[output_file])
            all_jobs.append(job)

        workflow = Workflow(jobs=all_jobs, dependencies=[])
        # submit the workflow
        cfg = pyhrf.cfg['parallel-cluster']
        controller = WorkflowController(cfg['server_id'], cfg['user'])
        # controller.transfer_files(fids_to_transfer)
        wf_id = controller.submit_workflow(workflow=workflow,
                                           name="remote_map")

        Helper.transfer_input_files(wf_id, controller)

        Helper.wait_workflow(wf_id, controller)

        Helper.transfer_output_files(wf_id, controller)

        results = []
        for i in xrange(len(all_args)):
            fnout = op.join(data_dir, 'output_%d.pck' % i)
            fout = open(fnout)
            o = cPickle.load(fout)
            print 'file cPickle loaded:', o
            fout.close()
            os.remove(fnout)
            if isinstance(o, Exception):
                raise RemoteException('Task %d failed' % i, o)
                if o.errno != 17:
                    raise RemoteException('Task %d failed' % i, o)
            results.append(o)
        return results