Beispiel #1
0
class ComputeDevicePool:
    def __init__(self,
                 compute_devices:
                 tp.Optional[tp.Iterable[tp.Union[int, ComputeDevice]]] = None,
                 compute_device_filter:
                 tp.Optional[ComputeDeviceFilter] = exclude_intel_devices,
                 multiprocessing_pool_type: MultiprocessingPoolType = MultiprocessingPoolType.default()) \
            -> None:
        """
        This method constructs a compute device pool from a collection of
        individual devices.

        :param compute_devices: a collection of device ids or compute devices
        :param compute_device_filter: provide a predicate used to filter devices 
                                      to include in the pool
        :param multiprocessing_pool_type: the type of multi-processing pool 
                                          (see class MultiprocessingPoolType)

        """
        if compute_devices is None:
            compute_devices = ComputeDeviceManager.get_compute_devices()

        self._compute_devices \
            = _get_set_of_compute_devices_from_iterable(compute_devices)

        if compute_device_filter is not None:
            compute_devices = \
                    filter(compute_device_filter,
                           [compute_device
                            for compute_device
                            in self._compute_devices])
            self._compute_devices = frozenset(compute_devices)

        if exclude_intel_devices:
            compute_devices = \
                filter(lambda x: 'intel' not in x.name.lower(),
                       [compute_device
                        for compute_device
                        in self._compute_devices])
            self._compute_devices = frozenset(compute_devices)

        # ctx = multiprocessing.get_context("spawn")
        # self._executor = ProcessPoolExecutor(max_workers=self._n_gpus,
        #                                      mp_context=ctx)

        if multiprocessing_pool_type == MultiprocessingPoolType.LOKY:
            from loky import get_reusable_executor, wait

            self._executor = get_reusable_executor(
                max_workers=self.number_of_devices,
                timeout=None,
                context='loky')

            futures = [
                self._executor.submit(_init_gpu_in_process,
                                      device_id=compute_device.id)
                for compute_device in self._compute_devices
            ]

            wait(futures)

            [future.result() for future in futures]
        elif multiprocessing_pool_type == MultiprocessingPoolType.PATHOS:
            from pathos.pools import ProcessPool

            self._executor = ProcessPool(nodes=self.number_of_devices)
            futures = [
                self._executor.apipe(_init_gpu_in_process,
                                     device_id=compute_device.id)
                for compute_device in self._compute_devices
            ]

            for future in futures:
                while not future.ready():
                    pass
        else:
            raise ValueError(
                f'Multiprocessing pool type {multiprocessing_pool_type} not supported'
            )

        self._multiprocessing_pool_type = multiprocessing_pool_type

    @property
    def compute_devices(self) -> tp.FrozenSet[ComputeDevice]:
        return self._compute_devices

    @property
    def number_of_devices(self) -> int:
        return len(self.compute_devices)

    @property
    def multiprocessing_pool_type(self) -> MultiprocessingPoolType:
        return self._multiprocessing_pool_type

    def sync(self):
        for compute_device in self._compute_devices:
            compute_device.sync()

    def map_reduce(
            self,
            f: tp.Callable[..., ResultType],
            reduction: tp.Callable[[ResultType, ResultType], ResultType],
            initial_value: ResultType,
            host_to_device_transfer_function:
            tp.Optional[ParameterTransferFunction] = None,
            device_to_host_transfer_function:
            tp.Optional[tp.Callable[[ResultType], ResultType]] = None,
            args_list: tp.Optional[tp.Sequence[tp.Sequence]] = None,
            kwargs_list: tp.Optional[tp.Sequence[tp.Dict[str, tp.Any]]] = None,
            number_of_batches: tp.Optional[int] = None) \
            -> ResultType:
        """
        This method evaluates the function 'f' on elements of 'args_list' and 
        'kwargs_list' in parallel on multiple devices and performs the reduction 
        by calling the function 'reduction' on the result and the result of the 
        reductions so far to eventually produce one final result of type 
        'ResultType'. The reduce step is performed from the left and results are 
        being processed in the same order as they appear in `args_list` and 
        `kwargs_list`. 
    
        Input data to the function f must initially reside in host memory and 
        the user must provide functions 'host_to_device_transfer_function' and 
        'device_to_host_transfer_function' to transfer the data to and results 
        from device memory respectively.
    
        If the arguments for each run of 'f' are identical and they have already 
        been applied to the function that is passed then 'args_list' and 
        'kwargs_list' may both be None but the argument 'number_of_batches' must 
        be specified so the method knows how many times to run the function 'f'.

        Args:
            f: The map function to be evaluated over elements of 'args_list' and 
               'kwargs_list'.
               
            reduction: The reduction to be performed on the results of 'f'. 
                       This is done on the host (not the device).
                       
            initial_value: The initial value of the reduction 
                           (i.e. the neutral element).
                           
            host_to_device_transfer_function: 
                A function that transfers elements of args_list and kwargs_list 
                from host memory to device memory.
                
            device_to_host_transfer_function: 
                A function that transfers results from device to host memory.
                
            args_list: A sequence of sequences of positional arguments.
            kwargs_list: A sequence of dictionaries of keyword arguments.
            number_of_batches: 
                The number of function evaluations is required if 'args_list' 
                and 'kwargs_list' are both empty.

        """

        args_list, kwargs_list, number_of_batches = \
            _extract_arguments_and_number_of_batches(
                args_list=args_list,
                kwargs_list=kwargs_list,
                number_of_batches=number_of_batches)

        def synced_f(index, *args, **kwargs) -> ResultType:
            if host_to_device_transfer_function is not None:
                args, kwargs = host_to_device_transfer_function(
                    *args, **kwargs)
            sync()
            result = f(*args, **kwargs)
            if device_to_host_transfer_function is not None:
                result = device_to_host_transfer_function(result)
            sync()
            return index, result

        results = []
        if self.multiprocessing_pool_type == MultiprocessingPoolType.LOKY:
            from loky import as_completed

            futures = [
                self._executor.submit(synced_f, i, *args, **kwargs)
                for i, (args, kwargs) in enumerate(zip(args_list, kwargs_list))
            ]

            for future in as_completed(futures):
                results.append(future.result())
                # result = reduction(result, future.result())
        elif self.multiprocessing_pool_type == MultiprocessingPoolType.PATHOS:
            futures = [
                self._executor.apipe(synced_f, i, *args, **kwargs)
                for i, (args, kwargs) in enumerate(zip(args_list, kwargs_list))
            ]

            for future in futures:
                results.append(future.get())
                # result = reduction(result, future.get())
        else:
            raise ValueError(
                f'Multiprocessing pool type {self.multiprocessing_pool_type} not supported'
            )

        results = sorted(results, key=lambda x: x[0])
        results = [result[1] for result in results]

        result = initial_value
        for new_result in results:
            result = reduction(result, new_result)

        return result

    def map_combine(self,
                    f: tp.Callable[..., ResultType],
                    combination: tp.Callable[[tp.Iterable[ResultType]],
                                             ResultType],
                    host_to_device_transfer_function: tp.
                    Optional[ParameterTransferFunction] = None,
                    device_to_host_transfer_function: tp.Optional[tp.Callable[
                        [ResultType], ResultType]] = None,
                    args_list: tp.Optional[tp.Sequence[tp.Sequence]] = None,
                    kwargs_list: tp.Optional[tp.Sequence[tp.Dict[
                        str, tp.Any]]] = None,
                    number_of_batches: tp.Optional[int] = None) -> ResultType:
        """
        This method evaluates the function `f` on elements of `args_list` and 
        `kwargs_list` in parallel on multiple devices and aggregates results 
        in a single step by calling the function `combination` with a list of all 
        results. Results provided to `combination` are in the same order as 
        they appear in `args_list` and `kwargs_list`. 
    
        Input data to the function f must initially reside in host memory and 
        the user must provide functions 'host_to_device_transfer_function' and 
        'device_to_host_transfer_function' to transfer the data to and results 
        from device memory respectively.
    
        If the arguments for each run of 'f' are identical and they have already 
        been applied to the function that is passed then 'args_list' and 
        'kwargs_list' may both be None but the argument 'number_of_batches' must 
        be specified so the method knows how many times to run the function 'f'.

        Args:
            f: The map function to be evaluated over elements of 'args_list' and 
               'kwargs_list'.
               
            combination: 
                A function that aggregates a list of all results in a single step
                
            host_to_device_transfer_function: 
                A function that transfers elements of args_list and kwargs_list 
                from host memory to device memory.
                
            device_to_host_transfer_function:
                 A function that transfers results from device to host memory.
                 
            args_list: A sequence of sequences of positional arguments.
            kwargs_list: A sequence of dictionaries of keyword arguments.
            number_of_batches: 
                The number of function evaluations is required if 'args_list' 
                and 'kwargs_list' are both empty.
        """
        args_list, kwargs_list, number_of_batches = \
            _extract_arguments_and_number_of_batches(
                args_list=args_list,
                kwargs_list=kwargs_list,
                number_of_batches=number_of_batches)

        def synced_f(index, *args, **kwargs) -> ResultType:
            if host_to_device_transfer_function is not None:
                args, kwargs = host_to_device_transfer_function(
                    *args, **kwargs)
            sync()
            result = f(*args, **kwargs)
            if device_to_host_transfer_function is not None:
                result = device_to_host_transfer_function(result)
            sync()
            return index, result

        results = []
        if self.multiprocessing_pool_type == MultiprocessingPoolType.LOKY:
            from loky import as_completed

            futures = [
                self._executor.submit(synced_f, i, *args, **kwargs)
                for i, (args, kwargs) in enumerate(zip(args_list, kwargs_list))
            ]

            for future in as_completed(futures):
                results.append(future.result())
        elif self.multiprocessing_pool_type == MultiprocessingPoolType.PATHOS:
            futures = [
                self._executor.apipe(synced_f, i, *args, **kwargs)
                for i, (args, kwargs) in enumerate(zip(args_list, kwargs_list))
            ]

            for future in futures:
                results.append(future.get())
        else:
            raise ValueError(
                f'Multiprocessing pool type {self.multiprocessing_pool_type} not supported'
            )

        results = sorted(results, key=lambda x: x[0])
        results = [result[1] for result in results]

        return combination(results)