예제 #1
0
class LSFScheduler(Scheduler):

    # Constructor
    def __init__(self,
                 min_cores=1,
                 max_cores=1,
                 min_memory='1 GB',
                 max_memory='1 GB',
                 processes=1,
                 walltime='02:00',
                 **kwargs):
        # Call parent constructor
        super().__init__(min_cores=min_cores,
                         max_cores=max_cores,
                         min_memory=min_memory,
                         max_memory=max_memory)
        # Define cluster default parameters
        self.cluster_kwargs = {
            **{
                'memory': max_memory,
                'cores': min_cores,
                'processes': processes,
                'walltime': walltime
            },
            **kwargs
        }

    # Define adapt method
    def adapt(self, minimum, maximum, **kwargs):
        # Merge kwargs with default kwargs
        kwargs = {**self.cluster_kwargs, **kwargs}
        # Call parent adapt method (check values)
        super().adapt(minimum, maximum, **kwargs)
        # Make new cluster
        self._cluster = LSFCluster(**kwargs)
        # Make client
        self._client = Client(self._cluster)
        # Adapt cluster
        self._cluster.adapt(minimum=minimum, maximum=maximum)
        # Return client reference
        return self.client
예제 #2
0
    def activate_client(self,
                        library=('dask', 'LSF'),
                        num_processes=2,
                        timeout=1800):
        """
        Parameters
        ----------
        library : tuple(str, str), default ('dask', 'LSF')
            parallelism and scheduler tuple
        num_processes : int or None
            number of workers to run with the new client
            if None, num_processes will be adaptive
        timeout : int
            number of seconds to wait to fulfill the workers order
        """
        self.library = library
        if library is not None:
            _logger.debug(f"library is not None")
            assert library[0] in list(
                self.supported_libraries.keys()
            ), f"{library[0]} is not a supported parallelism. (supported parallelisms are {self.supported_libraries.keys()})"
            assert library[1] in list(
                self.supported_libraries[library[0]]
            ), f"{library[1]} is not a supported . (supported parallelisms are {self.supported_libraries[library[0]]})"
        elif library is None:
            _logger.debug(f"library is None")
            self.client = None
            self._adapt = False
            self.num_processes = 0
            self.workers = {}
            return

        if library[0] == 'dask':
            _logger.debug(f"detected dask parallelism...")
            if library[1] == 'LSF':
                _logger.debug(f"detected LSF scheduler")
                from dask_jobqueue import LSFCluster
                _logger.debug(f"creating cluster...")
                cluster = LSFCluster()
                if num_processes is None:
                    _logger.debug(f"adaptive cluster")
                    self._adapt = True
                    cluster.adapt(minimum=1, interval='1s')
                else:
                    _logger.debug(f"nonadaptive cluster")
                    self._adapt = False
                    self.num_processes = num_processes
                    cluster.scale(self.num_processes)

                _logger.debug(f"creating client with cluster")
                self.client = distributed.Client(cluster, timeout=timeout)
                if not self._adapt:
                    while len(self.client.nthreads()) != self.num_processes:
                        _logger.debug(
                            f"waiting for worker request fulfillment...")
                        time.sleep(5)
                worker_threads = self.client.nthreads()
                self.workers = {
                    i: _worker
                    for i, _worker in zip(range(len(worker_threads)),
                                          worker_threads.keys())
                }
                _logger.debug(f"workers initialized: {self.workers}")
            else:
                raise Exception(
                    f"{library[1]} is supported, but without client-activation functionality!"
                )