コード例 #1
0
def main(ns):

    # compute the tasks
    boxes = list(range(ns.start, ns.stop, ns.step))
    
    # initialize the algorithm wrapper
    bianchi = BianchiWrapper(ns.config)
    
    # initialize the worker pool
    kws = {'comm':comm, 'use_all_cpus':ns.use_all_cpus, 'debug':ns.debug}
    manager = TaskManager(bianchi, ns.N, **kws)
       
    # do the work
    results = manager.compute(boxes)
コード例 #2
0
ファイル: nbkit-batch.py プロジェクト: rainwoodman/nbodykit
    def __init__(self, comm, 
                       algorithm_name, 
                       config, 
                       cpus_per_worker, 
                       task_dims, 
                       task_values, 
                       log_level=logging.INFO, 
                       extras={},
                       use_all_cpus=False):
        """
        Parameters
        ----------
        comm : MPI communicator
            the global communicator that will be split and divided
            amongs the independent workers
        algorithm_name : str
            the string name of the `Algorithm` we are running
        config : str
            the name of the file holding the template config file, which
            will be updated for each task that is performed
        cpus_per_worker : int
            the desired number of ranks assigned to each independent
            worker, when iterating over the tasks in parallel
        task_dims : list
            a list of strings specifying the names of the task dimensions -- 
            these specify the string formatting key when updating the config
            template file for each task value
        task_value : list
            a list of tuples specifying the task values which will be iterated 
            over -- each tuple should be the length of `task_dims`
        log_level : int, optional
            an integer specifying the logging level to use -- default
            is the `INFO` level
        extras : dict, optional
            a dictionary where the values are lists of string replacements, with
            length equal to the total number of tasks -- if the keys are present
            in the config file, the string formatting will update the config
            file with the `ith` element of the list for the `ith` iteration
        use_all_cpus : bool, optional
            if `True`, then do not force each worker group to have 
            exactly `cpus_per_worker` ranks, instead including the remainder 
            as well; default is `False`
        """
        setup_logging(log_level)

        self.algorithm_name  = algorithm_name
        self.algorithm_class = getattr(algorithms, algorithm_name)
        self.template        = os.path.expandvars(open(config, 'r').read())
        self.cpus_per_worker = cpus_per_worker
        self.task_dims       = task_dims
        self.task_values     = task_values
        self.extras          = extras
        
        self.comm      = comm
        self.size      = comm.size
        self.rank      = comm.rank
        
        # initialize the worker pool
        kws = {'comm':self.comm, 'use_all_cpus':use_all_cpus}
        if log_level <= logging.DEBUG: kws['debug'] = True
        self.workers = TaskManager(self.compute_one_task, self.cpus_per_worker, **kws)
コード例 #3
0
ファイル: nbkit-batch.py プロジェクト: mjvakili/nbodykit
    def __init__(self,
                 comm,
                 algorithm_name,
                 config,
                 cpus_per_worker,
                 task_dims,
                 task_values,
                 log_level=logging.INFO,
                 extras={},
                 use_all_cpus=False):
        """
        Parameters
        ----------
        comm : MPI communicator
            the global communicator that will be split and divided
            amongs the independent workers
        algorithm_name : str
            the string name of the `Algorithm` we are running
        config : str
            the name of the file holding the template config file, which
            will be updated for each task that is performed
        cpus_per_worker : int
            the desired number of ranks assigned to each independent
            worker, when iterating over the tasks in parallel
        task_dims : list
            a list of strings specifying the names of the task dimensions -- 
            these specify the string formatting key when updating the config
            template file for each task value
        task_value : list
            a list of tuples specifying the task values which will be iterated 
            over -- each tuple should be the length of `task_dims`
        log_level : int, optional
            an integer specifying the logging level to use -- default
            is the `INFO` level
        extras : dict, optional
            a dictionary where the values are lists of string replacements, with
            length equal to the total number of tasks -- if the keys are present
            in the config file, the string formatting will update the config
            file with the `ith` element of the list for the `ith` iteration
        use_all_cpus : bool, optional
            if `True`, then do not force each worker group to have 
            exactly `cpus_per_worker` ranks, instead including the remainder 
            as well; default is `False`
        """
        setup_logging(log_level)

        self.algorithm_name = algorithm_name
        self.algorithm_class = getattr(algorithms, algorithm_name)
        self.template = os.path.expandvars(open(config, 'r').read())
        self.cpus_per_worker = cpus_per_worker
        self.task_dims = task_dims
        self.task_values = task_values
        self.extras = extras

        self.comm = comm
        self.size = comm.size
        self.rank = comm.rank

        # initialize the worker pool
        kws = {'comm': self.comm, 'use_all_cpus': use_all_cpus}
        if log_level <= logging.DEBUG: kws['debug'] = True
        self.workers = TaskManager(self.compute_one_task, self.cpus_per_worker,
                                   **kws)
コード例 #4
0
ファイル: nbkit-batch.py プロジェクト: rainwoodman/nbodykit
class BatchAlgorithmDriver(object):
    """
    Class to facilitate running algorithms in batch mode
    """
    def __init__(self, comm, 
                       algorithm_name, 
                       config, 
                       cpus_per_worker, 
                       task_dims, 
                       task_values, 
                       log_level=logging.INFO, 
                       extras={},
                       use_all_cpus=False):
        """
        Parameters
        ----------
        comm : MPI communicator
            the global communicator that will be split and divided
            amongs the independent workers
        algorithm_name : str
            the string name of the `Algorithm` we are running
        config : str
            the name of the file holding the template config file, which
            will be updated for each task that is performed
        cpus_per_worker : int
            the desired number of ranks assigned to each independent
            worker, when iterating over the tasks in parallel
        task_dims : list
            a list of strings specifying the names of the task dimensions -- 
            these specify the string formatting key when updating the config
            template file for each task value
        task_value : list
            a list of tuples specifying the task values which will be iterated 
            over -- each tuple should be the length of `task_dims`
        log_level : int, optional
            an integer specifying the logging level to use -- default
            is the `INFO` level
        extras : dict, optional
            a dictionary where the values are lists of string replacements, with
            length equal to the total number of tasks -- if the keys are present
            in the config file, the string formatting will update the config
            file with the `ith` element of the list for the `ith` iteration
        use_all_cpus : bool, optional
            if `True`, then do not force each worker group to have 
            exactly `cpus_per_worker` ranks, instead including the remainder 
            as well; default is `False`
        """
        setup_logging(log_level)

        self.algorithm_name  = algorithm_name
        self.algorithm_class = getattr(algorithms, algorithm_name)
        self.template        = os.path.expandvars(open(config, 'r').read())
        self.cpus_per_worker = cpus_per_worker
        self.task_dims       = task_dims
        self.task_values     = task_values
        self.extras          = extras
        
        self.comm      = comm
        self.size      = comm.size
        self.rank      = comm.rank
        
        # initialize the worker pool
        kws = {'comm':self.comm, 'use_all_cpus':use_all_cpus}
        if log_level <= logging.DEBUG: kws['debug'] = True
        self.workers = TaskManager(self.compute_one_task, self.cpus_per_worker, **kws)
                
    @classmethod
    def create(cls, comm=None, desc=None):
        """
        Parse the task manager and return the ``BatchAlgorithmDriver`` instance
        """
        import inspect 
        
        if comm is None: comm = MPI.COMM_WORLD
        args_dict = cls.parse_args(desc)
        args_dict['comm'] = comm
        
        # inspect the __init__ function
        args, varargs, varkw, defaults = inspect.getargspec(cls.__init__)
        
        # determine the required arguments
        args = args[1:] # remove 'self'
        if defaults:
            required = args[:-len(defaults)]
        else:
            required = args
            
        # get the args, kwargs to pass to __init__
        fargs = tuple(args_dict[p] for p in required)
        fkwargs = {}
        if defaults:
            for i, p in enumerate(defaults):
                name = args[-len(defaults)+i]
                fkwargs[name] = args_dict.get(name, defaults[i])
        
        return cls(*fargs, **fkwargs)
        
    @classmethod
    def parse_args(cls, desc=None):
        """
        Parse command-line arguments that are needed to initialize a 
        `BatchAlgorithmDriver` class
        
        Parameters
        ----------
        desc : str, optional
            the description of to use for this parser
        """
        import argparse
        import itertools
        
        # parse
        parser = argparse.ArgumentParser(description=desc) 
        
        # first argument is the algorithm name
        h = 'the name of the `Algorithm` to run in batch mode'
        valid_algorithms = list(vars(algorithms))
        parser.add_argument(dest='algorithm_name', choices=valid_algorithms, help=h)  
        
        # the number of independent workers
        h = """the desired number of ranks assigned to each independent
                worker, when iterating over the tasks in parallel""" 
        parser.add_argument('cpus_per_worker', type=int, help=h)
    
        # now do the required named arguments
        required_named = parser.add_argument_group('required named arguments')
        
        # specify the tasks along one dimension 
        h =  """given a string of the format ``key: tasks``, split the string and then
                try to parse the ``tasks``, by first trying to evaluate it, and then
                simply splitting it and interpreting the results as the tasks. 
        
                The general use cases are: 
        
                1) "box: range(2)" -> key = `box`, tasks = `[0, 1]`
                2) "box: [A, B, C]" -> key = `box`, tasks = `['A', 'B', 'C']`
                
                If multiple options passed with `-i` flag, then the total tasks to 
                perform will be the product of the tasks lists passed"""
        required_named.add_argument('-i', dest='tasks', action='append', 
                type=tasks_parser, required=True, help=h)
    
        # the template config file
        h = """the name of the template config file (using YAML synatx) that 
                provides the `Algorithm` parameters; the file should use 
                ``string.format`` syntax to indicate which variables will be 
                updated for each task, i.e., an input file could be specified 
                as 'input/DataSource_box{box}.dat', if `box` were one of the task 
                dimensions"""
        required_named.add_argument('-c', '--config', required=True, type=str, help=h)
    
        # read any extra string replacements from file
        h = """file providing extra string replaces, with lines of the form 
                 `tag = ['tag1', 'tag2']`; if the keys match keywords in the 
                 template param file, the file with be updated with
                 the `ith` value for the `ith` task"""
        parser.add_argument('--extras', dest='extras', default={}, type=replacements_from_file, help=h)
    
        h = "set the logging output to debug, with lots more info printed"
        parser.add_argument('--debug', help=h, action="store_const", dest="log_level", 
                            const=logging.DEBUG, default=logging.INFO)
                            
        h = "if `True`, include all available cpus in the worker pool"
        parser.add_argument('--use_all_cpus', action='store_true', help=h)
                                
        args = parser.parse_args()
        
        # format the tasks, taking the product of multiple task lists
        keys = []; values = []
        for [key, tasks] in args.tasks:
            keys.append(key)
            values.append(tasks)

        # take the product
        if len(keys) > 1:
            values = list(itertools.product(*values))
        else:
            values = values[0]
            
        # save
        args.task_dims = keys
        args.task_values = values
        
        return vars(args)
            
    def compute(self):
        """
        Compute all of the tasks
        """             
        # compute the work
        results = self.workers.compute(self.task_values)
        return results
            
    def compute_one_task(self, itask, task):
        """
        Run the algorithm once, using the parameters specified by `task`,
        which is the `itask` iteration
    
        Parameters
        ----------
        itask : int
            the integer index of this task
        task : tuple
            a tuple of values representing this task value
        """
        # if you are the pool's root, write out the temporary parameter file
        this_config = None
        if self.workers.subcomm.rank == 0:
                
            # initialize a temporary file
            with tempfile.NamedTemporaryFile(delete=False) as ff:
                
                this_config = ff.name
                logger.debug("creating temporary file: %s" %this_config)
                
                # key/values for this task 
                if len(self.task_dims) == 1:
                    possible_kwargs = {self.task_dims[0] : task}
                else:
                    possible_kwargs = dict(zip(self.task_dims, task))
                    
                # any extra key/value pairs for this tasks
                if self.extras is not None:
                    for k in self.extras:
                        possible_kwargs[k] = self.extras[k][itask]
                        
                # use custom formatter that only formats the possible keys, ignoring other
                # occurences of curly brackets
                formatter = Formatter()
                formatter.parse = lambda l: SafeStringParse(formatter, l, list(possible_kwargs))
                kwargs = [kw for _, kw, _, _ in formatter.parse(self.template) if kw]
                        
                # do the string formatting if the key is present in template
                valid = {k:possible_kwargs[k] for k in possible_kwargs if k in kwargs}
                ff.write(formatter.format(self.template, **valid).encode())
        
        # bcast the file name to all in the worker pool
        this_config = self.workers.subcomm.bcast(this_config, root=0)

        # configuration file passed via -c
        params, extra = ReadConfigFile(open(this_config, 'r').read(), self.algorithm_class.schema)
        
        # output is required
        output = getattr(extra, 'output', None)
        if output is None:
            raise ValueError("argument `output` is required in config file")
            
        # initialize the algorithm and run
        alg = self.algorithm_class(**vars(params))
        result = alg.run()
        alg.save(output, result)

        # remove temporary files
        if self.workers.subcomm.rank == 0:
            if os.path.exists(this_config): 
                logger.debug("removing temporary file: %s" %this_config)
                os.remove(this_config)
                
        return 0
コード例 #5
0
ファイル: nbkit-batch.py プロジェクト: mjvakili/nbodykit
class BatchAlgorithmDriver(object):
    """
    Class to facilitate running algorithms in batch mode
    """
    def __init__(self,
                 comm,
                 algorithm_name,
                 config,
                 cpus_per_worker,
                 task_dims,
                 task_values,
                 log_level=logging.INFO,
                 extras={},
                 use_all_cpus=False):
        """
        Parameters
        ----------
        comm : MPI communicator
            the global communicator that will be split and divided
            amongs the independent workers
        algorithm_name : str
            the string name of the `Algorithm` we are running
        config : str
            the name of the file holding the template config file, which
            will be updated for each task that is performed
        cpus_per_worker : int
            the desired number of ranks assigned to each independent
            worker, when iterating over the tasks in parallel
        task_dims : list
            a list of strings specifying the names of the task dimensions -- 
            these specify the string formatting key when updating the config
            template file for each task value
        task_value : list
            a list of tuples specifying the task values which will be iterated 
            over -- each tuple should be the length of `task_dims`
        log_level : int, optional
            an integer specifying the logging level to use -- default
            is the `INFO` level
        extras : dict, optional
            a dictionary where the values are lists of string replacements, with
            length equal to the total number of tasks -- if the keys are present
            in the config file, the string formatting will update the config
            file with the `ith` element of the list for the `ith` iteration
        use_all_cpus : bool, optional
            if `True`, then do not force each worker group to have 
            exactly `cpus_per_worker` ranks, instead including the remainder 
            as well; default is `False`
        """
        setup_logging(log_level)

        self.algorithm_name = algorithm_name
        self.algorithm_class = getattr(algorithms, algorithm_name)
        self.template = os.path.expandvars(open(config, 'r').read())
        self.cpus_per_worker = cpus_per_worker
        self.task_dims = task_dims
        self.task_values = task_values
        self.extras = extras

        self.comm = comm
        self.size = comm.size
        self.rank = comm.rank

        # initialize the worker pool
        kws = {'comm': self.comm, 'use_all_cpus': use_all_cpus}
        if log_level <= logging.DEBUG: kws['debug'] = True
        self.workers = TaskManager(self.compute_one_task, self.cpus_per_worker,
                                   **kws)

    @classmethod
    def create(cls, comm=None, desc=None):
        """
        Parse the task manager and return the ``BatchAlgorithmDriver`` instance
        """
        import inspect

        if comm is None: comm = MPI.COMM_WORLD
        args_dict = cls.parse_args(desc)
        args_dict['comm'] = comm

        # inspect the __init__ function
        args, varargs, varkw, defaults = inspect.getargspec(cls.__init__)

        # determine the required arguments
        args = args[1:]  # remove 'self'
        if defaults:
            required = args[:-len(defaults)]
        else:
            required = args

        # get the args, kwargs to pass to __init__
        fargs = tuple(args_dict[p] for p in required)
        fkwargs = {}
        if defaults:
            for i, p in enumerate(defaults):
                name = args[-len(defaults) + i]
                fkwargs[name] = args_dict.get(name, defaults[i])

        return cls(*fargs, **fkwargs)

    @classmethod
    def parse_args(cls, desc=None):
        """
        Parse command-line arguments that are needed to initialize a 
        `BatchAlgorithmDriver` class
        
        Parameters
        ----------
        desc : str, optional
            the description of to use for this parser
        """
        import argparse
        import itertools

        # parse
        parser = argparse.ArgumentParser(description=desc)

        # first argument is the algorithm name
        h = 'the name of the `Algorithm` to run in batch mode'
        valid_algorithms = list(vars(algorithms))
        parser.add_argument(dest='algorithm_name',
                            choices=valid_algorithms,
                            help=h)

        # the number of independent workers
        h = """the desired number of ranks assigned to each independent
                worker, when iterating over the tasks in parallel"""
        parser.add_argument('cpus_per_worker', type=int, help=h)

        # now do the required named arguments
        required_named = parser.add_argument_group('required named arguments')

        # specify the tasks along one dimension
        h = """given a string of the format ``key: tasks``, split the string and then
                try to parse the ``tasks``, by first trying to evaluate it, and then
                simply splitting it and interpreting the results as the tasks. 
        
                The general use cases are: 
        
                1) "box: range(2)" -> key = `box`, tasks = `[0, 1]`
                2) "box: [A, B, C]" -> key = `box`, tasks = `['A', 'B', 'C']`
                
                If multiple options passed with `-i` flag, then the total tasks to 
                perform will be the product of the tasks lists passed"""
        required_named.add_argument('-i',
                                    dest='tasks',
                                    action='append',
                                    type=tasks_parser,
                                    required=True,
                                    help=h)

        # the template config file
        h = """the name of the template config file (using YAML synatx) that 
                provides the `Algorithm` parameters; the file should use 
                ``string.format`` syntax to indicate which variables will be 
                updated for each task, i.e., an input file could be specified 
                as 'input/DataSource_box{box}.dat', if `box` were one of the task 
                dimensions"""
        required_named.add_argument('-c',
                                    '--config',
                                    required=True,
                                    type=str,
                                    help=h)

        # read any extra string replacements from file
        h = """file providing extra string replaces, with lines of the form 
                 `tag = ['tag1', 'tag2']`; if the keys match keywords in the 
                 template param file, the file with be updated with
                 the `ith` value for the `ith` task"""
        parser.add_argument('--extras',
                            dest='extras',
                            default={},
                            type=replacements_from_file,
                            help=h)

        h = "set the logging output to debug, with lots more info printed"
        parser.add_argument('--debug',
                            help=h,
                            action="store_const",
                            dest="log_level",
                            const=logging.DEBUG,
                            default=logging.INFO)

        h = "if `True`, include all available cpus in the worker pool"
        parser.add_argument('--use_all_cpus', action='store_true', help=h)

        args = parser.parse_args()

        # format the tasks, taking the product of multiple task lists
        keys = []
        values = []
        for [key, tasks] in args.tasks:
            keys.append(key)
            values.append(tasks)

        # take the product
        if len(keys) > 1:
            values = list(itertools.product(*values))
        else:
            values = values[0]

        # save
        args.task_dims = keys
        args.task_values = values

        return vars(args)

    def compute(self):
        """
        Compute all of the tasks
        """
        # compute the work
        results = self.workers.compute(self.task_values)
        return results

    def compute_one_task(self, itask, task):
        """
        Run the algorithm once, using the parameters specified by `task`,
        which is the `itask` iteration
    
        Parameters
        ----------
        itask : int
            the integer index of this task
        task : tuple
            a tuple of values representing this task value
        """
        # if you are the pool's root, write out the temporary parameter file
        this_config = None
        if self.workers.subcomm.rank == 0:

            # initialize a temporary file
            with tempfile.NamedTemporaryFile(delete=False) as ff:

                this_config = ff.name
                logger.debug("creating temporary file: %s" % this_config)

                # key/values for this task
                if len(self.task_dims) == 1:
                    possible_kwargs = {self.task_dims[0]: task}
                else:
                    possible_kwargs = dict(zip(self.task_dims, task))

                # any extra key/value pairs for this tasks
                if self.extras is not None:
                    for k in self.extras:
                        possible_kwargs[k] = self.extras[k][itask]

                # use custom formatter that only formats the possible keys, ignoring other
                # occurences of curly brackets
                formatter = Formatter()
                formatter.parse = lambda l: SafeStringParse(
                    formatter, l, list(possible_kwargs))
                kwargs = [
                    kw for _, kw, _, _ in formatter.parse(self.template) if kw
                ]

                # do the string formatting if the key is present in template
                valid = {
                    k: possible_kwargs[k]
                    for k in possible_kwargs if k in kwargs
                }
                ff.write(formatter.format(self.template, **valid).encode())

        # bcast the file name to all in the worker pool
        this_config = self.workers.subcomm.bcast(this_config, root=0)

        # configuration file passed via -c
        params, extra = ReadConfigFile(
            open(this_config, 'r').read(), self.algorithm_class.schema)

        # output is required
        output = getattr(extra, 'output', None)
        if output is None:
            raise ValueError("argument `output` is required in config file")

        # initialize the algorithm and run
        alg = self.algorithm_class(**vars(params))
        result = alg.run()
        alg.save(output, result)

        # remove temporary files
        if self.workers.subcomm.rank == 0:
            if os.path.exists(this_config):
                logger.debug("removing temporary file: %s" % this_config)
                os.remove(this_config)

        return 0