def _wait_futures(self, data): logger.info('Reduce function: waiting for map results') fut_list = data['results'] wait(fut_list, self.internal_storage, download_results=True) results = [f.result() for f in fut_list if f.done and not f.futures] fut_list.clear() data['results'] = results
def map_reduce(self, map_function, map_iterdata, reduce_function, chunksize=None, worker_processes=None, extra_args=None, extra_env=None, map_runtime_memory=None, obj_chunk_size=None, obj_chunk_number=None, reduce_runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=None, reducer_one_per_object=False, reducer_wait_local=False, include_modules=[], exclude_modules=[]): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param worker_processes: Number of concurrent/parallel processes in each worker Default 1 :param reduce_function: the function to reduce over the futures :param extra_env: Additional environment variables for action environment. Default None. :param extra_args: Additional arguments to pass to function activation. Default None. :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config). :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config). :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation. :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False' :param timeout: Time that the functions have to complete their execution before raising a timeout. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(self.config, self.internal_storage, self.executor_id, map_job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, chunk_size=chunk_size, chunk_n=chunk_n, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, invoke_pool_threads=invoke_pool_threads) map_futures = self.invoker.run_job(map_job) self.futures.extend(map_futures) if reducer_wait_local: wait(fs=map_futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job( self.config, self.internal_storage, self.executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run_job(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return map_futures + reduce_futures
def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=None, threadpool_size=THREADPOOL_SIZE, wait_dur_sec=WAIT_DUR_SEC): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param threadpool_size: Number of threads to use. Default 64 :param wait_dur_sec: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ futures = fs or self.futures if type(futures) != list: futures = [futures] # Start waiting for results try: wait(fs=futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor, download_results=download_results, throw_except=throw_except, return_when=return_when, timeout=timeout, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) except Exception as e: self.invoker.stop() if not fs and is_notebook(): del self.futures[len(self.futures) - len(futures):] if self.data_cleaner and not self.is_lithops_worker: self.clean(clean_cloudobjects=False, force=True) raise e finally: present_jobs = {f.job_key for f in futures} self.job_monitor.stop(present_jobs) if self.data_cleaner and not self.is_lithops_worker: self.clean(clean_cloudobjects=False) if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.success or f.done] fs_notdone = [f for f in futures if not f.success and not f.done] return fs_done, fs_notdone
def wait( self, fs: Optional[Union[ResponseFuture, FuturesList, List[ResponseFuture]]] = None, throw_except: Optional[bool] = True, return_when: Optional[Any] = ALL_COMPLETED, download_results: Optional[bool] = False, timeout: Optional[int] = None, threadpool_size: Optional[int] = THREADPOOL_SIZE, wait_dur_sec: Optional[int] = WAIT_DUR_SEC ) -> Tuple[FuturesList, FuturesList]: """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True :param return_when: Percentage of done futures :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results :param threadpool_size: Number of threads to use. Default 64 :param wait_dur_sec: Time interval between each check :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. """ futures = fs or self.futures if type(futures) != list and type(futures) != FuturesList: futures = [futures] # Start waiting for results try: wait(fs=futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor, download_results=download_results, throw_except=throw_except, return_when=return_when, timeout=timeout, threadpool_size=threadpool_size, wait_dur_sec=wait_dur_sec) if self.data_cleaner and return_when == ALL_COMPLETED: present_jobs = {f.job_key for f in futures} self.compute_handler.clear(present_jobs) self.clean(clean_cloudobjects=False) except (KeyboardInterrupt, Exception) as e: self.invoker.stop() self.job_monitor.stop() if not fs and is_notebook(): del self.futures[len(self.futures) - len(futures):] if self.data_cleaner: present_jobs = {f.job_key for f in futures} self.compute_handler.clear(present_jobs) self.clean(clean_cloudobjects=False, force=True) raise e if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.success or f.done] fs_notdone = [f for f in futures if not f.success and not f.done] return create_futures_list(fs_done, self), create_futures_list( fs_notdone, self)