Example #1
0
def create_map_job(config, internal_storage, executor_id, job_id, map_function,
                   iterdata,  runtime_meta, runtime_memory, extra_env,
                   include_modules, exclude_modules, execution_timeout,
                   chunksize=None, worker_processes=None, extra_args=None,
                   obj_chunk_size=None, obj_chunk_number=None, chunk_size=None,
                   chunk_n=None):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """

    if chunk_size or chunk_n:
        print('>> WARNING: chunk_size and chunk_n parameters are deprecated'
              'use obj_chunk_size and obj_chunk_number instead')
        obj_chunk_size = chunk_size
        obj_chunk_number = chunk_n

    host_job_meta = {'host_job_create_tstamp': time.time()}
    map_iterdata = utils.verify_args(map_function, iterdata, extra_args)

    # Object processing functionality
    ppo = None
    if utils.is_object_processing_function(map_function):
        create_partitions_start = time.time()
        # Create partitions according chunk_size or chunk_number
        logger.debug('ExecutorID {} | JobID {} - Calling map on partitions '
                     'from object storage flow'.format(executor_id, job_id))
        map_iterdata, ppo = create_partitions(config, internal_storage,
                                              map_iterdata, obj_chunk_size,
                                              obj_chunk_number)

        host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6)
    # ########

    job = _create_job(config=config,
                      internal_storage=internal_storage,
                      executor_id=executor_id,
                      job_id=job_id,
                      func=map_function,
                      iterdata=map_iterdata,
                      chunksize=chunksize,
                      worker_processes=worker_processes,
                      runtime_meta=runtime_meta,
                      runtime_memory=runtime_memory,
                      extra_env=extra_env,
                      include_modules=include_modules,
                      exclude_modules=exclude_modules,
                      execution_timeout=execution_timeout,
                      host_job_meta=host_job_meta)

    if ppo:
        job.parts_per_object = ppo

    return job
Example #2
0
def create_map_job(config, internal_storage, executor_id, job_id, map_function,
                   iterdata, runtime_meta, runtime_memory, extra_env,
                   include_modules, exclude_modules, execution_timeout,
                   extra_args=None,  obj_chunk_size=None, obj_chunk_number=None,
                   invoke_pool_threads=128):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """

    host_job_meta = {'host_job_create_tstamp': time.time()}
    map_iterdata = utils.verify_args(map_function, iterdata, extra_args)

    if config['lithops'].get('rabbitmq_monitor', False):
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        utils.create_rabbitmq_resources(rabbit_amqp_url, executor_id, job_id)

    # Object processing functionality
    parts_per_object = None
    if is_object_processing_function(map_function):
        create_partitions_start = time.time()
        # Create partitions according chunk_size or chunk_number
        logger.debug('ExecutorID {} | JobID {} - Calling map on partitions '
                     'from object storage flow'.format(executor_id, job_id))
        map_iterdata, parts_per_object = create_partitions(config, internal_storage,
                                                           map_iterdata, obj_chunk_size,
                                                           obj_chunk_number)
        host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6)
    # ########

    job = _create_job(config=config,
                      internal_storage=internal_storage,
                      executor_id=executor_id,
                      job_id=job_id,
                      func=map_function,
                      iterdata=map_iterdata,
                      runtime_meta=runtime_meta,
                      runtime_memory=runtime_memory,
                      extra_env=extra_env,
                      include_modules=include_modules,
                      exclude_modules=exclude_modules,
                      execution_timeout=execution_timeout,
                      host_job_meta=host_job_meta,
                      invoke_pool_threads=invoke_pool_threads)

    if parts_per_object:
        job.parts_per_object = parts_per_object

    return job