def map(self, map_function, map_iterdata, chunksize=None, worker_processes=None, extra_args=None, extra_env=None, runtime_memory=None, chunk_size=None, chunk_n=None, obj_chunk_size=None, obj_chunk_number=None, timeout=None, invoke_pool_threads=None, include_modules=[], exclude_modules=[]): """ For running multiple function executions asynchronously :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1 :param extra_args: Additional args to pass to the function activations :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation :param remote_invocation: Enable or disable remote_invocation mechanism :param timeout: Time that the functions have to complete their execution before raising a timeout :param invoke_pool_threads: Number of threads to use to invoke :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: A list with size `len(iterdata)` of futures. """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, chunk_size=chunk_size, chunk_n=chunk_n, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, invoke_pool_threads=invoke_pool_threads) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures
def map(self, map_function, map_iterdata, extra_args=None, extra_env=None, runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=500, include_modules=[], exclude_modules=[]): """ :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param extra_args: Additional arguments to pass to the function activation. Default None. :param extra_env: Additional environment variables for action environment. Default None. :param runtime_memory: Memory to use to run the function. Default None (loaded from config). :param chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param chunk_n: Number of chunks to split each object. 'None' for processing the whole file in one function activation. :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False' :param timeout: Time that the functions have to complete their execution before raising a timeout. :param invoke_pool_threads: Number of threads to use to invoke. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures. """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=map_function, iterdata=map_iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, obj_chunk_size=chunk_size, obj_chunk_number=chunk_n, invoke_pool_threads=invoke_pool_threads) futures = self.invoker.run(job) self.futures.extend(futures) return futures
def call_async(self, func, data, extra_env=None, runtime_memory=None, timeout=None, include_modules=[], exclude_modules=[]): """ For running one function execution asynchronously :param func: the function to map over the data :param data: input data :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param timeout: Time that the functions have to complete their execution before raising a timeout :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: future object. """ job_id = self._create_job_id('A') self.last_call = 'call_async' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=func, iterdata=[data], runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures[0]
def call_async(self, func: Callable, data: Union[List[Any], Tuple[Any, ...], Dict[str, Any]], extra_env: Optional[Dict] = None, runtime_memory: Optional[int] = None, timeout: Optional[int] = None, include_modules: Optional[List] = [], exclude_modules: Optional[List] = []) -> ResponseFuture: """ For running one function execution asynchronously. :param func: The function to map over the data. :param data: Input data. Arguments can be passed as a list or tuple, or as a dictionary for keyword arguments. :param extra_env: Additional env variables for function environment. :param runtime_memory: Memory to use to run the function. :param timeout: Time that the function has to complete its execution before raising a timeout. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: Response future. """ job_id = self._create_job_id('A') self.last_call = 'call_async' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=job_id, map_function=func, iterdata=[data], runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) futures = self.invoker.run_job(job) self.futures.extend(futures) return futures[0]
def map_reduce(self, map_function, map_iterdata, reduce_function, chunksize=None, worker_processes=None, extra_args=None, extra_env=None, map_runtime_memory=None, obj_chunk_size=None, obj_chunk_number=None, reduce_runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=None, reducer_one_per_object=False, reducer_wait_local=False, include_modules=[], exclude_modules=[]): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param worker_processes: Number of concurrent/parallel processes in each worker Default 1 :param reduce_function: the function to reduce over the futures :param extra_env: Additional environment variables for action environment. Default None. :param extra_args: Additional arguments to pass to function activation. Default None. :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config). :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config). :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation. :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False' :param timeout: Time that the functions have to complete their execution before raising a timeout. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(self.config, self.internal_storage, self.executor_id, map_job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, chunk_size=chunk_size, chunk_n=chunk_n, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, invoke_pool_threads=invoke_pool_threads) map_futures = self.invoker.run_job(map_job) self.futures.extend(map_futures) if reducer_wait_local: wait(fs=map_futures, internal_storage=self.internal_storage, job_monitor=self.job_monitor) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job( self.config, self.internal_storage, self.executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run_job(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return map_futures + reduce_futures
def map_reduce(self, map_function: Callable, map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]], reduce_function: Callable, chunksize: Optional[int] = None, extra_args: Optional[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]] = None, extra_env: Optional[Dict[str, str]] = None, map_runtime_memory: Optional[int] = None, reduce_runtime_memory: Optional[int] = None, obj_chunk_size: Optional[int] = None, obj_chunk_number: Optional[int] = None, timeout: Optional[int] = None, reducer_one_per_object: Optional[bool] = False, spawn_reducer: Optional[int] = 20, include_modules: Optional[List[str]] = [], exclude_modules: Optional[List[str]] = []) -> FuturesList: """ Map the map_function over the data and apply the reduce_function across all futures. :param map_function: The function to map over the data :param map_iterdata: An iterable of input data :param reduce_function: The function to reduce over the futures :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1 :param extra_args: Additional arguments to pass to function activation. Default None :param extra_env: Additional environment variables for action environment. Default None :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config) :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config) :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation :param timeout: Time that the functions have to complete their execution before raising a timeout :param reducer_one_per_object: Set one reducer per object after running the partitioner :param spawn_reducer: Percentage of done map functions before spawning the reduce function :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=map_job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) map_futures = self.invoker.run_job(map_job) self.futures.extend(map_futures) if isinstance(map_iterdata, FuturesList): for fut in map_iterdata: fut._produce_output = False if spawn_reducer != ALWAYS: self.wait(map_futures, return_when=spawn_reducer) logger.debug( f'ExecutorID {self.executor_id} | JobID {map_job_id} - ' f'{spawn_reducer}% of map activations done. Spawning reduce stage' ) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job( config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, reduce_job_id=reduce_job_id, reduce_function=reduce_function, map_job=map_job, map_futures=map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run_job(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return create_futures_list(map_futures + reduce_futures, self)
def map(self, map_function: Callable, map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]], chunksize: Optional[int] = None, extra_args: Optional[Union[List[Any], Tuple[Any, ...], Dict[str, Any]]] = None, extra_env: Optional[Dict[str, str]] = None, runtime_memory: Optional[int] = None, obj_chunk_size: Optional[int] = None, obj_chunk_number: Optional[int] = None, timeout: Optional[int] = None, include_modules: Optional[List[str]] = [], exclude_modules: Optional[List[str]] = []) -> FuturesList: """ Spawn multiple function activations based on the items of an input list. :param map_function: The function to map over the data :param map_iterdata: An iterable of input data (e.g python list). :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk :param extra_args: Additional arguments to pass to each map_function activation :param extra_env: Additional environment variables for function environment :param runtime_memory: Memory (in MB) to use to run the functions :param obj_chunk_size: Used for data processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation :param obj_chunk_number: Used for data processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set :param timeout: Max time per function activation (seconds) :param include_modules: Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None :param exclude_modules: Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules. :return: A list with size `len(map_iterdata)` of futures for each job (Futures are also internally stored by Lithops). """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(config=self.config, internal_storage=self.internal_storage, executor_id=self.executor_id, job_id=job_id, map_function=map_function, iterdata=map_iterdata, chunksize=chunksize, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, obj_chunk_size=obj_chunk_size, obj_chunk_number=obj_chunk_number) futures = self.invoker.run_job(job) self.futures.extend(futures) if isinstance(map_iterdata, FuturesList): for fut in map_iterdata: fut._produce_output = False return create_futures_list(futures, self)