def create_reduce_job(config, internal_storage, executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta, reducer_one_per_object=False, runtime_memory=None, extra_env=None, include_modules=[], exclude_modules=[], execution_timeout=None): """ Wrapper to create a reduce job. Apply a function across all map futures. """ host_job_meta = {'host_job_create_tstamp': time.time()} iterdata = [[ map_futures, ]] if 'parts_per_object' in map_job and reducer_one_per_object: prev_total_partitons = 0 iterdata = [] for total_partitions in map_job['parts_per_object']: iterdata.append([ map_futures[prev_total_partitons:prev_total_partitons + total_partitions] ]) prev_total_partitons = prev_total_partitons + total_partitions reduce_job_env = {'__PW_REDUCE_JOB': True} if extra_env is None: ext_env = reduce_job_env else: ext_env = extra_env.copy() ext_env.update(reduce_job_env) iterdata = utils.verify_args(reduce_function, iterdata, None) return _create_job(config, internal_storage, executor_id, reduce_job_id, reduce_function, iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=ext_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta)
def create_map_job(config, internal_storage, executor_id, job_id, map_function, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, chunksize=None, worker_processes=None, extra_args=None, obj_chunk_size=None, obj_chunk_number=None, chunk_size=None, chunk_n=None): """ Wrapper to create a map job. It integrates COS logic to process objects. """ if chunk_size or chunk_n: print('>> WARNING: chunk_size and chunk_n parameters are deprecated' 'use obj_chunk_size and obj_chunk_number instead') obj_chunk_size = chunk_size obj_chunk_number = chunk_n host_job_meta = {'host_job_create_tstamp': time.time()} map_iterdata = utils.verify_args(map_function, iterdata, extra_args) # Object processing functionality ppo = None if utils.is_object_processing_function(map_function): create_partitions_start = time.time() # Create partitions according chunk_size or chunk_number logger.debug('ExecutorID {} | JobID {} - Calling map on partitions ' 'from object storage flow'.format(executor_id, job_id)) map_iterdata, ppo = create_partitions(config, internal_storage, map_iterdata, obj_chunk_size, obj_chunk_number) host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6) # ######## job = _create_job(config=config, internal_storage=internal_storage, executor_id=executor_id, job_id=job_id, func=map_function, iterdata=map_iterdata, chunksize=chunksize, worker_processes=worker_processes, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta) if ppo: job.parts_per_object = ppo return job
def create_reduce_job(config, internal_storage, executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta, runtime_memory, reducer_one_per_object, extra_env, include_modules, exclude_modules, execution_timeout=None): """ Wrapper to create a reduce job. Apply a function across all map futures. """ host_job_meta = {'host_job_create_tstamp': time.time()} iterdata = [(map_futures, )] if hasattr(map_job, 'parts_per_object') and reducer_one_per_object: prev_total_partitons = 0 iterdata = [] for total_partitions in map_job.parts_per_object: iterdata.append( (map_futures[prev_total_partitons:prev_total_partitons + total_partitions], )) prev_total_partitons += total_partitions reduce_job_env = {'__LITHOPS_REDUCE_JOB': True} if extra_env is None: ext_env = reduce_job_env else: ext_env = extra_env.copy() ext_env.update(reduce_job_env) iterdata = utils.verify_args(reduce_function, iterdata, None) return _create_job(config=config, internal_storage=internal_storage, executor_id=executor_id, job_id=reduce_job_id, func=reduce_function, iterdata=iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=ext_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta)
def create_map_job(config, internal_storage, executor_id, job_id, map_function, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, extra_args=None, obj_chunk_size=None, obj_chunk_number=None, invoke_pool_threads=128): """ Wrapper to create a map job. It integrates COS logic to process objects. """ host_job_meta = {'host_job_create_tstamp': time.time()} map_iterdata = utils.verify_args(map_function, iterdata, extra_args) if config['lithops'].get('rabbitmq_monitor', False): rabbit_amqp_url = config['rabbitmq'].get('amqp_url') utils.create_rabbitmq_resources(rabbit_amqp_url, executor_id, job_id) # Object processing functionality parts_per_object = None if is_object_processing_function(map_function): create_partitions_start = time.time() # Create partitions according chunk_size or chunk_number logger.debug('ExecutorID {} | JobID {} - Calling map on partitions ' 'from object storage flow'.format(executor_id, job_id)) map_iterdata, parts_per_object = create_partitions(config, internal_storage, map_iterdata, obj_chunk_size, obj_chunk_number) host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6) # ######## job = _create_job(config=config, internal_storage=internal_storage, executor_id=executor_id, job_id=job_id, func=map_function, iterdata=map_iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=execution_timeout, host_job_meta=host_job_meta, invoke_pool_threads=invoke_pool_threads) if parts_per_object: job.parts_per_object = parts_per_object return job
def _fill_optional_args(self, function, data): """ Fills in those reserved, optional parameters that might be write to the function signature """ func_sig = inspect.signature(function) if len(data) == 1 and 'future' in data: # Function chaining feature out = [ data.pop('future').result( internal_storage=self.internal_storage) ] data.update(verify_args(function, out, None)[0]) if 'ibm_cos' in func_sig.parameters: if 'ibm_cos' in self.lithops_config: if self.internal_storage.backend == 'ibm_cos': ibm_boto3_client = self.internal_storage.get_client() else: ibm_boto3_client = Storage(config=self.lithops_config, backend='ibm_cos').get_client() data['ibm_cos'] = ibm_boto3_client else: raise Exception( 'Cannot create the ibm_cos client: missing configuration') if 'storage' in func_sig.parameters: data['storage'] = self.internal_storage.storage if 'rabbitmq' in func_sig.parameters: if 'rabbitmq' in self.lithops_config: rabbit_amqp_url = self.lithops_config['rabbitmq'].get( 'amqp_url') params = pika.URLParameters(rabbit_amqp_url) connection = pika.BlockingConnection(params) data['rabbitmq'] = connection else: raise Exception( 'Cannot create the rabbitmq client: missing configuration') if 'id' in func_sig.parameters: data['id'] = int(self.job.call_id)