def __init__(self, result_container=None, verbose=False, n_processes=1, source_paths=None, python_executable=None, cache_callable=True): """Initialize the scheduler and start the slave processes. result_container -- ResultContainer used to store the results. verbose -- Set to True to get progress reports from the scheduler (default value is False). n_processes -- Number of processes used in parallel. If None (default) then the number of detected CPU cores is used. source_paths -- List of paths that are added to sys.path in the processes to make the task unpickling work. A single path instead of a list is also accepted. If None (default value) then source_paths is set to sys.path. To prevent this you can specify an empty list. python_executable -- Python executable that is used for the processes. The default value is None, in which case sys.executable will be used. cache_callable -- Cache the task objects in the processes (default is True). Disabling caching can reduce the memory usage, but will generally be less efficient since the task_callable has to be pickled each time. """ super(ProcessScheduler, self).__init__( result_container=result_container, verbose=verbose) if n_processes: self._n_processes = n_processes else: self._n_processes = cpu_count() self._cache_callable = cache_callable if python_executable is None: python_executable = sys.executable # get the location of this module to start the processes module_path = os.path.dirname(mdp.__file__) module_file = os.path.join(module_path, "parallel", "process_schedule.py") # Note: -u argument is important on Windows to set stdout to binary # mode. Otherwise you might get a strange error message for # copy_reg. process_args = [python_executable, "-u", module_file] process_args.append(str(self._cache_callable)) if isinstance(source_paths, str): source_paths = [source_paths] if source_paths is None: source_paths = sys.path process_args += source_paths # list of processes not in use, start the processes now self._free_processes = [subprocess.Popen(args=process_args, stdout=subprocess.PIPE, stdin=subprocess.PIPE) for _ in range(self._n_processes)] # tag each process with its cached callable task_index, # this is compared with _last_callable_index to check if the cached # task_callable is still up to date for process in self._free_processes: process._callable_index = -1 if self.verbose: print ("scheduler initialized with %d processes" % self._n_processes)
def test_cpu_count(): """Test the cpu_count helper function.""" n_cpus = parallel.cpu_count() assert isinstance(n_cpus, int)
def __init__(self, result_container=None, verbose=False, n_processes=1, source_paths=None, python_executable=None, cache_callable=True): """Initialize the scheduler and start the slave processes. result_container -- ResultContainer used to store the results. verbose -- Set to True to get progress reports from the scheduler (default value is False). n_processes -- Number of processes used in parallel. If None (default) then the number of detected CPU cores is used. source_paths -- List of paths that are added to sys.path in the processes to make the task unpickling work. A single path instead of a list is also accepted. If None (default value) then source_paths is set to sys.path. To prevent this you can specify an empty list. python_executable -- Python executable that is used for the processes. The default value is None, in which case sys.executable will be used. cache_callable -- Cache the task objects in the processes (default is True). Disabling caching can reduce the memory usage, but will generally be less efficient since the task_callable has to be pickled each time. """ super(ProcessScheduler, self).__init__(result_container=result_container, verbose=verbose) if n_processes: self._n_processes = n_processes else: self._n_processes = cpu_count() self._cache_callable = cache_callable if python_executable is None: python_executable = sys.executable # get the location of this module to start the processes module_path = os.path.dirname(mdp.__file__) module_file = os.path.join(module_path, "parallel", "process_schedule.py") # Note: -u argument is important on Windows to set stdout to binary # mode. Otherwise you might get a strange error message for # copy_reg. process_args = [python_executable, "-u", module_file] process_args.append(str(self._cache_callable)) if isinstance(source_paths, str): source_paths = [source_paths] if source_paths is None: source_paths = sys.path process_args += source_paths # list of processes not in use, start the processes now self._free_processes = [ subprocess.Popen(args=process_args, stdout=subprocess.PIPE, stdin=subprocess.PIPE) for _ in range(self._n_processes) ] # tag each process with its cached callable task_index, # this is compared with _last_callable_index to check if the cached # task_callable is still up to date for process in self._free_processes: process._callable_index = -1 if self.verbose: print("scheduler initialized with %d processes" % self._n_processes)
def test_cpu_count(): """Test the cpu_count helper function.""" n_cpus = parallel.cpu_count() assert isinstance(n_cpus, int)