Ejemplo n.º 1
0
def run_function(function, folder_dataset, list_subj, list_args=[], nb_cpu=None, verbose=1, test_integrity=0):
    """
    Run a test function on the dataset using multiprocessing and save the results
    :return: results
    # results are organized as the following: tuple of (status, output, DataFrame with results)
    """

    # add full path to each subject
    list_subj_path = [os.path.join(folder_dataset, subject) for subject in list_subj]

    # All scripts that are using multithreading with ITK must not use it when using multiprocessing
    os.environ["ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS"] = "1"

    # create list that finds all the combinations for function + subject path + arguments. Example of one list element:
    # ('sct_propseg', os.path.join(path_sct, 'data', 'sct_test_function', '200_005_s2''), '-i ' + os.path.join("t2", "t2.nii.gz") + ' -c t2', 1)
    list_func_subj_args = list(itertools.product(*[[function], list_subj_path, list_args, [test_integrity]]))
        # data_and_params = itertools.izip(itertools.repeat(function), data_subjects, itertools.repeat(parameters))

    logger.debug("stating pool with {} thread(s)".format(nb_cpu))
    pool = PoolExecutor(nb_cpu)
    compute_time = None
    try:
        compute_time = time.time()
        count = 0
        all_results = []

        # logger.info('Waiting for results, be patient')
        future_dirs = {pool.submit(function_launcher, subject_arg): subject_arg
                         for subject_arg in list_func_subj_args}

        for future in concurrent.futures.as_completed(future_dirs):
            count += 1
            subject = os.path.basename(future_dirs[future][1])
            arguments = future_dirs[future][2]
            try:
                result = future.result()
                sct.no_new_line_log('Processing subjects... {}/{}'.format(count, len(list_func_subj_args)))
                all_results.append(result)
            except Exception as exc:
                logger.error('{} {} generated an exception: {}'.format(subject, arguments, exc))

        compute_time = time.time() - compute_time

        # concatenate all_results into single Panda structure
        results_dataframe = pd.concat(all_results)

    except KeyboardInterrupt:
        logger.warning("\nCaught KeyboardInterrupt, terminating workers")
        for job in future_dirs:
            job.cancel()
    except Exception as e:
        logger.error('Error on line {}'.format(sys.exc_info()[-1].tb_lineno))
        logger.exception(e)
        for job in future_dirs:
            job.cancel()
        raise
    finally:
        pool.shutdown()

    return {'results': results_dataframe, "compute_time": compute_time}
Ejemplo n.º 2
0
def main():

    executor = MPIPoolExecutor()
    futures = []
    for id in range(1, size):
        futures.append(executor.submit(report, id))
    report(0)
    try:
        results = [future.result() for future in futures]
        num_returned = len(set(results))
        print('%i workers completed %i tasks' % (num_workers, num_returned))
        sys.stdout.flush()
        time.sleep(0.1)
    except Exception:
        traceback.print_exc(file=sys.stdout)
        sys.stdout.flush()
        time.sleep(0.1)
        executor.shutdown(wait=False)
        os._exit(1)
    executor.shutdown()
Ejemplo n.º 3
0
class MyMPIPool(object):
    def __init__(self, **kwargs):
        from mpi4py.futures import MPIPoolExecutor
        self.real = MPIPoolExecutor(**kwargs)

    def map(self, func, args, chunksize=1):
        return list(self.real.map(func, args, chunksize=chunksize))

    def imap_unordered(self, func, args, chunksize=1):
        return result_iter([self.real.submit(func, a) for a in args])

    def bootup(self, **kwargs):
        return self.real.bootup(**kwargs)

    def shutdown(self, **kwargs):
        return self.real.shutdown(**kwargs)

    def close(self):
        self.shutdown()

    def join(self):
        pass

    def apply_async(self, *args, **kwargs):
        raise RuntimeError('APPLY_ASYNC NOT IMPLEMENTED IN MyMPIPool')

    def get_worker_cpu(self):
        return 0.

    def get_worker_wall(self):
        return 0.

    def get_pickle_traffic(self):
        return None

    def get_pickle_traffic_string(self):
        return 'nope'
Ejemplo n.º 4
0
def run_function(function,
                 folder_dataset,
                 list_subj,
                 list_args=[],
                 nb_cpu=None,
                 verbose=1,
                 test_integrity=0):
    """
    Run a test function on the dataset using multiprocessing and save the results
    :return: results
    # results are organized as the following: tuple of (status, output, DataFrame with results)
    """

    # add full path to each subject
    list_subj_path = [
        os.path.join(folder_dataset, subject) for subject in list_subj
    ]

    # All scripts that are using multithreading with ITK must not use it when using multiprocessing
    os.environ["ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS"] = "1"

    # create list that finds all the combinations for function + subject path + arguments. Example of one list element:
    # ('sct_propseg', os.path.join(path_sct, 'data', 'sct_test_function', '200_005_s2''), '-i ' + os.path.join("t2", "t2.nii.gz") + ' -c t2', 1)
    list_func_subj_args = list(
        itertools.product(
            *[[function], list_subj_path, list_args, [test_integrity]]))
    # data_and_params = itertools.izip(itertools.repeat(function), data_subjects, itertools.repeat(parameters))

    sct.log.debug("stating pool with {} thread(s)".format(nb_cpu))
    pool = PoolExecutor(nb_cpu)
    compute_time = None
    try:
        compute_time = time.time()
        count = 0
        all_results = []

        # sct.log.info('Waiting for results, be patient')
        future_dirs = {
            pool.submit(function_launcher, subject_arg): subject_arg
            for subject_arg in list_func_subj_args
        }

        for future in concurrent.futures.as_completed(future_dirs):
            count += 1
            subject = os.path.basename(future_dirs[future][1])
            arguments = future_dirs[future][2]
            try:
                result = future.result()
                sct.no_new_line_log('Processing subjects... {}/{}'.format(
                    count, len(list_func_subj_args)))
                all_results.append(result)
            except Exception as exc:
                sct.log.error('{} {} generated an exception: {}'.format(
                    subject, arguments, exc))

        compute_time = time.time() - compute_time

        # concatenate all_results into single Panda structure
        results_dataframe = pd.concat(all_results)

    except KeyboardInterrupt:
        sct.log.warning("\nCaught KeyboardInterrupt, terminating workers")
        for job in future_dirs:
            job.cancel()
    except Exception as e:
        sct.log.error('Error on line {}'.format(sys.exc_info()[-1].tb_lineno))
        sct.log.exception(e)
        for job in future_dirs:
            job.cancel()
        raise
    finally:
        pool.shutdown()

    return {'results': results_dataframe, "compute_time": compute_time}
Ejemplo n.º 5
0
if __name__ == "__main__":
    args = command_line_args()

    threads = args.threads
    if threads < 1:
        raise RuntimeError( "Invalid number of threads: {}".format(threads) )

    # set up the execution pool, use MPI if available and called with mpiexec
    try:
        from mpi4py import MPI
        from mpi4py.futures import MPIPoolExecutor
        print "setting up MPIPoolExecutor, size ",threads
        executor = MPIPoolExecutor(max_workers=threads)
        futures = []
        for edge_dir in args.edge_dirs:
            futures.append( executor.submit( run_func, edge_dir, args ) )

        # wait for all processes to return
        for f in futures:
            if f.result() > 0:
                raise RuntimeError( "mptp has failed! " + f.result() )
    except ImportError:
        from concurrent.futures import ProcessPoolExecutor
        from multiprocessing import Pool
        executor = ProcessPoolExecutor(max_workers=threads)
        pool = Pool(processes=threads)
        results = [pool.apply_async( run_func, args=(edge_dir, args)) for edge_dir in args.edge_dirs]
        for result in results:
            if result.get() > 0:
                raise RuntimeError( "mptp has failed!" )
Ejemplo n.º 6
0
class MPIFuturesInterface(object):
    """
    Class provides an interface to extend the mpi4py.futures concurrency tools for flexible nested parallel
    computations.
    """
    class AsyncResultWrapper(object):
        """
        When ready(), get() returns results as a list in the same order as submission.
        """
        def __init__(self, futures):
            """

            :param futures: list of :class:'mpi4py.futures.Future'
            """
            self.futures = futures
            self._ready = False

        def ready(self, wait=None):
            """
            :param wait: int or float
            :return: bool
            """
            time_stamp = time.time()
            if wait is None:
                wait = 0
            while not np.all([future.done() for future in self.futures]):
                if time.time() - time_stamp > wait:
                    return False
            self._ready = True
            return True

        def get(self):
            """
            Returns None until all results have completed, then returns a list of results in the order of original
            submission.
            :return: list
            """
            if self._ready or self.ready():
                return [future.result() for future in self.futures]
            else:
                return None

    def __init__(self, procs_per_worker=1):
        """

        :param procs_per_worker: int
        """
        try:
            from mpi4py import MPI
            from mpi4py.futures import MPIPoolExecutor
        except ImportError:
            raise ImportError(
                'nested: MPIFuturesInterface: problem with importing from mpi4py.futures'
            )
        self.global_comm = MPI.COMM_WORLD
        if procs_per_worker > 1:
            print 'nested: MPIFuturesInterface: procs_per_worker reduced to 1; collective operations not yet ' \
                  'implemented'
        self.procs_per_worker = 1
        self.executor = MPIPoolExecutor()
        self.rank = self.global_comm.rank
        self.global_size = self.global_comm.size
        self.num_workers = self.global_size - 1
        self.apply_counter = 0
        self.map = self.map_sync
        self.apply = self.apply_sync
        self.init_workers(disp=True)

    def init_workers(self, disp=False):
        """

        :param disp: bool
        """
        futures = []
        for task_id in xrange(1, self.global_size):
            futures.append(
                self.executor.submit(mpi_futures_init_worker, task_id, disp))
        results = [future.result() for future in futures]
        num_returned = len(set(results))
        if num_returned != self.num_workers:
            raise ValueError(
                'nested: MPIFuturesInterface: %i / %i processes returned from init_workers'
                % (num_returned, self.num_workers))
        self.print_info()

    def print_info(self):
        """

        """
        print 'nested: MPIFuturesInterface: process id: %i; rank: %i / %i; num_workers: %i' % \
              (os.getpid(), self.rank, self.global_size, self.num_workers)
        sys.stdout.flush()
        time.sleep(0.1)

    def apply_sync(self, func, *args, **kwargs):
        """
        mpi4py.futures lacks a native method to guarantee execution of a function on all workers. This method
        implements a synchronous (blocking) apply operation that accepts **kwargs and returns values collected from each
        worker.
        :param func: callable
        :param args: list
        :param kwargs: dict
        :return: dynamic
        """
        apply_key = int(self.apply_counter)
        self.apply_counter += 1
        futures = []
        for rank in xrange(1, self.global_size):
            futures.append(
                self.executor.submit(mpi_futures_apply_wrapper, func,
                                     apply_key, args, kwargs))
        results = [future.result() for future in futures]
        return results

    def map_sync(self, func, *sequences):
        """
        This method wraps mpi4py.futures.MPIPoolExecutor.map to implement a synchronous (blocking) map operation.
        Uses all available processes, and returns results as a list in the same order as the specified sequences.
        :param func: callable
        :param sequences: list
        :return: list
        """
        if not sequences:
            return None
        results = []
        for result in self.executor.map(func, *sequences):
            results.append(result)
        return results

    def map_async(self, func, *sequences):
        """
        This method wraps mpi4py.futures.MPIPoolExecutor.submit to implement an asynchronous (non-blocking) map
        operation. Uses all available processes, and returns results as a list in the same order as the specified
        sequences. Returns an AsyncResultWrapper object to track progress of the submitted jobs.
        :param func: callable
        :param sequences: list
        :return: list
        """
        if not sequences:
            return None
        futures = []
        for args in zip(*sequences):
            futures.append(self.executor.submit(func, *args))
        return self.AsyncResultWrapper(futures)

    def get(self, object_name):
        """
        mpi4py.futures lacks a native method to get the value of an object from all workers. This method implements a
        synchronous (blocking) pull operation.
        :param object_name: str
        :return: dynamic
        """
        return self.apply_sync(find_nested_object, object_name)

    def start(self, disp=False):
        pass

    def stop(self):
        self.executor.shutdown()

    def ensure_controller(self):
        """
        Exceptions in python on an MPI rank are not enough to end a job. Strange behavior results when an unhandled
        Exception occurs on an MPI rank while under the control of an mpi4py.futures.MPIPoolExecutor. This method will
        hard exit python if executed by any rank other than the master.
        """
        if self.rank != 0:
            os._exit(1)