コード例 #1
0
ファイル: job.py プロジェクト: lchu-ibm/pywren-ibm-cloud
def create_map_job(config, internal_storage, executor_id, map_job_id, map_function, iterdata, runtime_meta,
                   runtime_memory=None, extra_params=None, extra_env=None, obj_chunk_size=None,
                   obj_chunk_number=None, remote_invocation=False, remote_invocation_groups=None,
                   invoke_pool_threads=128, include_modules=[], exclude_modules=[], is_remote_cluster=False,
                   execution_timeout=EXECUTION_TIMEOUT):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """
    map_func = map_function
    map_iterdata = utils.verify_args(map_function, iterdata, extra_params)
    new_invoke_pool_threads = invoke_pool_threads
    new_runtime_memory = runtime_memory

    # Object processing functionality
    parts_per_object = None
    if utils.is_object_processing_function(map_function):
        # If it is object processing function, create partitions according chunk_size or chunk_number
        logger.debug('ExecutorID {} | JobID {} - Calling map on partitions from object storage flow'.format(executor_id, map_job_id))
        map_iterdata, parts_per_object = create_partitions(config, map_iterdata, obj_chunk_size, obj_chunk_number)
    # ########

    # Remote invocation functionality
    original_total_tasks = len(map_iterdata)
    if original_total_tasks == 1 or is_remote_cluster:
        remote_invocation = False
    if remote_invocation:
        def remote_invoker(input_data):
            pw = pywren.ibm_cf_executor()
            return pw.map(map_function, input_data,
                          runtime_memory=runtime_memory,
                          invoke_pool_threads=invoke_pool_threads,
                          extra_env=extra_env)

        map_func = remote_invoker
        if remote_invocation_groups:
            map_iterdata = [[iterdata[x:x+remote_invocation_groups]]
                            for x in range(0, original_total_tasks, remote_invocation_groups)]
        else:
            map_iterdata = [iterdata]
        map_iterdata = utils.verify_args(remote_invoker, map_iterdata, extra_params)
        new_invoke_pool_threads = 1
        new_runtime_memory = runtime_memory
    # ########

    job_description = _create_job(config, internal_storage, executor_id,
                                  map_job_id, map_func, map_iterdata,
                                  runtime_meta=runtime_meta,
                                  runtime_memory=new_runtime_memory,
                                  extra_env=extra_env,
                                  invoke_pool_threads=new_invoke_pool_threads,
                                  include_modules=include_modules,
                                  exclude_modules=exclude_modules,
                                  remote_invocation=remote_invocation,
                                  original_total_tasks=original_total_tasks,
                                  execution_timeout=execution_timeout)

    job_description['parts_per_object'] = parts_per_object

    return job_description
コード例 #2
0
    def map(self, map_function, iterdata, obj_chunk_size=None, extra_env=None, extra_meta=None,
            remote_invocation=False, remote_invocation_groups=None, invoke_pool_threads=128,
            data_all_as_one=True, job_max_runtime=wrenconfig.RUNTIME_TIMEOUT,
            overwrite_invoke_args=None, exclude_modules=None):
        """
        Wrapper to launch map() method.  It integrates COS logic to process objects.
        """
        data = wrenutil.iterdata_as_list(iterdata)
        map_func = map_function
        map_iterdata = data
        new_invoke_pool_threads = invoke_pool_threads
        parts_per_object = None

        if wrenutil.is_object_processing(map_function):
            '''
            If it is object processing function, create partitions according chunk_size
            '''
            logger.debug("Calling map on partitions from object storage flow")
            arg_data = wrenutil.verify_args(map_function, data, object_processing=True)
            storage = COSBackend(self.config['ibm_cos'])
            map_iterdata, parts_per_object = create_partitions(arg_data, obj_chunk_size, storage)
            map_func = partition_processor(map_function)

        # Remote invocation functionality
        original_iterdata_len = len(iterdata)
        if original_iterdata_len > 1 and remote_invocation:
            runtime_name = self.runtime_name
            runtime_memory = self.runtime_memory
            rabbitmq_monitor = "PYWREN_RABBITMQ_MONITOR" in os.environ

            def remote_invoker(input_data):
                pw = pywren.ibm_cf_executor(runtime=runtime_name,
                                            runtime_memory=runtime_memory,
                                            rabbitmq_monitor=rabbitmq_monitor)
                return pw.map(map_function, input_data,
                              invoke_pool_threads=invoke_pool_threads,
                              extra_env=extra_env,
                              extra_meta=extra_meta)

            map_func = remote_invoker
            if remote_invocation_groups:
                map_iterdata = [[iterdata[x:x+remote_invocation_groups]]
                                for x in range(0, original_iterdata_len, remote_invocation_groups)]
            else:
                map_iterdata = [iterdata]
            new_invoke_pool_threads = 1

        map_futures = self._map(map_func, map_iterdata,
                                extra_env=extra_env,
                                extra_meta=extra_meta,
                                invoke_pool_threads=new_invoke_pool_threads,
                                data_all_as_one=data_all_as_one,
                                overwrite_invoke_args=overwrite_invoke_args,
                                exclude_modules=exclude_modules,
                                original_func_name=map_function.__name__,
                                remote_invocation=remote_invocation,
                                original_iterdata_len=original_iterdata_len,
                                job_max_runtime=job_max_runtime)

        return map_futures, parts_per_object
コード例 #3
0
def create_call_async_job(config,
                          internal_storage,
                          executor_id,
                          async_job_id,
                          func,
                          data,
                          runtime_meta,
                          extra_env=None,
                          runtime_memory=None,
                          include_modules=[],
                          exclude_modules=[],
                          execution_timeout=EXECUTION_TIMEOUT):
    """
    Wrapper to create call_async job that contains only one function invocation.
    """
    data = utils.verify_args(func, [data], None)

    return _create_job(config,
                       internal_storage,
                       executor_id,
                       async_job_id,
                       func,
                       data,
                       runtime_meta,
                       runtime_memory=runtime_memory,
                       extra_env=extra_env,
                       execution_timeout=execution_timeout,
                       exclude_modules=exclude_modules,
                       include_modules=include_modules)
コード例 #4
0
def create_reduce_job(config, internal_storage, executor_id, reduce_job_id, reduce_function,
                      map_job, map_futures, runtime_meta, reducer_one_per_object=False,
                      runtime_memory=None, extra_env=None, include_modules=[], exclude_modules=[],
                      execution_timeout=None):
    """
    Wrapper to create a reduce job. Apply a function across all map futures.
    """
    job_created_tstamp = time.time()
    iterdata = [[map_futures, ]]

    if 'parts_per_object' in map_job and reducer_one_per_object:
        prev_total_partitons = 0
        iterdata = []
        for total_partitions in map_job['parts_per_object']:
            iterdata.append([map_futures[prev_total_partitons:prev_total_partitons+total_partitions]])
            prev_total_partitons = prev_total_partitons + total_partitions

    reduce_job_env = {'__PW_REDUCE_JOB': True}
    if extra_env is None:
        ext_env = reduce_job_env
    else:
        ext_env = extra_env.copy()
        ext_env.update(reduce_job_env)

    iterdata = utils.verify_args(reduce_function, iterdata, None)

    return _create_job(config, internal_storage, executor_id,
                       reduce_job_id, reduce_function,
                       iterdata, runtime_meta=runtime_meta,
                       runtime_memory=runtime_memory,
                       extra_env=ext_env,
                       include_modules=include_modules,
                       exclude_modules=exclude_modules,
                       execution_timeout=execution_timeout,
                       job_created_tstamp=job_created_tstamp)
コード例 #5
0
def create_map_job(config,
                   internal_storage,
                   executor_id,
                   job_id,
                   map_function,
                   iterdata,
                   runtime_meta,
                   runtime_memory=None,
                   extra_params=None,
                   extra_env=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   invoke_pool_threads=128,
                   include_modules=[],
                   exclude_modules=[],
                   execution_timeout=None):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """
    job_created_timestamp = time.time()
    map_func = map_function
    map_iterdata = utils.verify_args(map_function, iterdata, extra_params)
    new_invoke_pool_threads = invoke_pool_threads
    new_runtime_memory = runtime_memory

    if config['pywren'].get('rabbitmq_monitor', False):
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        utils.create_rabbitmq_resources(rabbit_amqp_url, executor_id, job_id)

    # Object processing functionality
    parts_per_object = None
    if is_object_processing_function(map_function):
        # If it is object processing function, create partitions according chunk_size or chunk_number
        logger.debug(
            'ExecutorID {} | JobID {} - Calling map on partitions from object storage flow'
            .format(executor_id, job_id))
        map_iterdata, parts_per_object = create_partitions(
            config, map_iterdata, obj_chunk_size, obj_chunk_number)
    # ########

    job_description = _create_job(config,
                                  internal_storage,
                                  executor_id,
                                  job_id,
                                  map_func,
                                  map_iterdata,
                                  runtime_meta=runtime_meta,
                                  runtime_memory=new_runtime_memory,
                                  extra_env=extra_env,
                                  invoke_pool_threads=new_invoke_pool_threads,
                                  include_modules=include_modules,
                                  exclude_modules=exclude_modules,
                                  execution_timeout=execution_timeout,
                                  job_created_timestamp=job_created_timestamp)

    if parts_per_object:
        job_description['parts_per_object'] = parts_per_object

    return job_description
コード例 #6
0
def create_map_job(config,
                   internal_storage,
                   executor_id,
                   map_job_id,
                   map_function,
                   iterdata,
                   runtime_meta,
                   runtime_memory=None,
                   extra_params=None,
                   extra_env=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   invoke_pool_threads=128,
                   include_modules=[],
                   exclude_modules=[],
                   execution_timeout=EXECUTION_TIMEOUT):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """
    map_func = map_function
    map_iterdata = utils.verify_args(map_function, iterdata, extra_params)
    new_invoke_pool_threads = invoke_pool_threads
    new_runtime_memory = runtime_memory

    # Object processing functionality
    parts_per_object = None
    if utils.is_object_processing_function(map_function):
        # If it is object processing function, create partitions according chunk_size or chunk_number
        logger.debug(
            'ExecutorID {} | JobID {} - Calling map on partitions from object storage flow'
            .format(executor_id, map_job_id))
        map_iterdata, parts_per_object = create_partitions(
            config, map_iterdata, obj_chunk_size, obj_chunk_number)
    # ########

    job_description = _create_job(config,
                                  internal_storage,
                                  executor_id,
                                  map_job_id,
                                  map_func,
                                  map_iterdata,
                                  runtime_meta=runtime_meta,
                                  runtime_memory=new_runtime_memory,
                                  extra_env=extra_env,
                                  invoke_pool_threads=new_invoke_pool_threads,
                                  include_modules=include_modules,
                                  exclude_modules=exclude_modules,
                                  execution_timeout=execution_timeout)

    job_description['parts_per_object'] = parts_per_object

    return job_description
コード例 #7
0
ファイル: job.py プロジェクト: lchu-ibm/pywren-ibm-cloud
def create_reduce_job(config, internal_storage, executor_id, reduce_job_id, reduce_function,
                      map_job, map_futures, runtime_meta, reducer_one_per_object=False,
                      runtime_memory=None, extra_env=None, include_modules=[], exclude_modules=[]):
    """
    Wrapper to create a reduce job. Apply a function across all map futures.
    """
    iterdata = [[map_futures, ]]

    if map_job['parts_per_object'] and reducer_one_per_object:
        prev_total_partitons = 0
        iterdata = []
        for total_partitions in map_job['parts_per_object']:
            iterdata.append([map_futures[prev_total_partitons:prev_total_partitons+total_partitions]])
            prev_total_partitons = prev_total_partitons + total_partitions

    def reduce_function_wrapper(fut_list, internal_storage, ibm_cos):
        logger.info('Waiting for results')
        if 'SHOW_MEMORY_USAGE' in os.environ:
            show_memory = eval(os.environ['SHOW_MEMORY_USAGE'])
        else:
            show_memory = False
        # Wait for all results
        wait_storage(fut_list, internal_storage, download_results=True)
        results = [f.result() for f in fut_list if f.done and not f.futures]
        fut_list.clear()
        reduce_func_args = {'results': results}

        if show_memory:
            logger.debug("Memory usage after getting the results: {}".format(utils.get_current_memory_usage()))

        # Run reduce function
        func_sig = inspect.signature(reduce_function)
        if 'ibm_cos' in func_sig.parameters:
            reduce_func_args['ibm_cos'] = ibm_cos
        if 'internal_storage' in func_sig.parameters:
            reduce_func_args['internal_storage'] = internal_storage

        return reduce_function(**reduce_func_args)

    iterdata = utils.verify_args(reduce_function_wrapper, iterdata, None)

    return _create_job(config, internal_storage, executor_id,
                       reduce_job_id, reduce_function_wrapper,
                       iterdata, runtime_meta=runtime_meta,
                       runtime_memory=runtime_memory,
                       extra_env=extra_env,
                       include_modules=include_modules,
                       exclude_modules=exclude_modules,
                       original_func_name=reduce_function.__name__)
コード例 #8
0
    def _map(self, func, iterdata, extra_env=None, extra_meta=None, invoke_pool_threads=128,
             data_all_as_one=True, overwrite_invoke_args=None, exclude_modules=None,
             original_func_name=None, remote_invocation=False, original_iterdata_len=None,
             job_max_runtime=wrenconfig.RUNTIME_TIMEOUT):
        """
        :param func: the function to map over the data
        :param iterdata: An iterable of input data
        :param extra_env: Additional environment variables for CF environment. Default None.
        :param extra_meta: Additional metadata to pass to CF. Default None.
        :param remote_invocation: Enable remote invocation. Default False.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param data_all_as_one: upload the data as a single object. Default True
        :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.
        :param original_func_name: Name of the function to invoke.
        :return: A list with size `len(iterdata)` of futures for each job
        :rtype:  list of futures.
        """
        if original_func_name:
            func_name = original_func_name
        else:
            func_name = func.__name__

        data = wrenutil.iterdata_as_list(iterdata)

        if extra_env is not None:
            extra_env = wrenutil.convert_bools_to_string(extra_env)

        if not data:
            return []

        if self.map_item_limit is not None and len(data) > self.map_item_limit:
            raise ValueError("len(data) ={}, exceeding map item limit of {}"
                             "consider mapping over a smaller"
                             "number of items".format(len(data),
                                                      self.map_item_limit))

        # This allows multiple parameters in functions
        data = wrenutil.verify_args(func, data)

        callgroup_id = wrenutil.create_callgroup_id()

        host_job_meta = {}

        log_msg = 'Executor ID {} Serializing function and data'.format(self.executor_id)
        logger.debug(log_msg)
        # pickle func and all data (to capture module dependencies)
        func_and_data_ser, mod_paths = self.serializer([func] + data)

        func_str = func_and_data_ser[0]
        data_strs = func_and_data_ser[1:]
        data_size_bytes = sum(len(x) for x in data_strs)

        agg_data_key = None
        host_job_meta['agg_data'] = False
        host_job_meta['data_size_bytes'] = data_size_bytes

        log_msg = 'Executor ID {} Uploading function and data'.format(self.executor_id)
        logger.info(log_msg)
        if not self.log_level:
            print(log_msg, end=' ')

        if data_size_bytes < wrenconfig.MAX_AGG_DATA_SIZE and data_all_as_one:
            agg_data_key = create_agg_data_key(self.internal_storage.prefix, self.executor_id, callgroup_id)
            agg_data_bytes, agg_data_ranges = self.agg_data(data_strs)
            agg_upload_time = time.time()
            self.internal_storage.put_data(agg_data_key, agg_data_bytes)
            host_job_meta['agg_data'] = True
            host_job_meta['data_upload_time'] = time.time() - agg_upload_time
            host_job_meta['data_upload_timestamp'] = time.time()
        else:
            log_msg = ('Executor ID {} Total data exceeded '
                       'maximum size of {} bytes'.format(self.executor_id,
                                                         wrenconfig.MAX_AGG_DATA_SIZE))
            logger.warning(log_msg)

        if exclude_modules:
            for module in exclude_modules:
                for mod_path in list(mod_paths):
                    if module in mod_path and mod_path in mod_paths:
                        mod_paths.remove(mod_path)

        module_data = create_mod_data(mod_paths)
        # Create func and upload
        func_module_str = pickle.dumps({'func': func_str, 'module_data': module_data}, -1)
        host_job_meta['func_module_bytes'] = len(func_module_str)

        func_upload_time = time.time()
        func_key = create_func_key(self.internal_storage.prefix, self.executor_id, callgroup_id)
        self.internal_storage.put_func(func_key, func_module_str)
        host_job_meta['func_upload_time'] = time.time() - func_upload_time
        host_job_meta['func_upload_timestamp'] = time.time()

        if not self.log_level:
            func_and_data_size = wrenutil.sizeof_fmt(host_job_meta['func_module_bytes']+host_job_meta['data_size_bytes'])
            log_msg = '- Total: {}'.format(func_and_data_size)
            print(log_msg)

        def invoke(data_str, executor_id, callgroup_id, call_id, func_key,
                   host_job_meta, agg_data_key=None, data_byte_range=None):
            data_key, output_key, status_key = create_keys(self.internal_storage.prefix,
                                                           executor_id, callgroup_id, call_id)
            host_job_meta['job_invoke_timestamp'] = time.time()

            if agg_data_key is None:
                data_upload_time = time.time()
                self.internal_storage.put_data(data_key, data_str)
                data_upload_time = time.time() - data_upload_time
                host_job_meta['data_upload_time'] = data_upload_time
                host_job_meta['data_upload_timestamp'] = time.time()

                data_key = data_key
            else:
                data_key = agg_data_key

            return self.invoke_with_keys(func_key, data_key,
                                         output_key, status_key,
                                         executor_id, callgroup_id,
                                         call_id, extra_env,
                                         extra_meta, data_byte_range,
                                         host_job_meta.copy(),
                                         job_max_runtime,
                                         overwrite_invoke_args=overwrite_invoke_args)

        N = len(data)
        call_futures = []
        if remote_invocation and original_iterdata_len > 1:
            log_msg = 'Executor ID {} Starting {} remote invocation function: Spawning {}() - Total: {} activations'.format(self.executor_id, N, func_name,
                                                                                                                            original_iterdata_len)
        else:
            log_msg = 'Executor ID {} Starting function invocation: {}() - Total: {} activations'.format(self.executor_id, func_name, N)
        logger.info(log_msg)
        if not self.log_level:
            print(log_msg)

        with ThreadPoolExecutor(max_workers=invoke_pool_threads) as executor:
            for i in range(N):
                call_id = "{:05d}".format(i)

                data_byte_range = None
                if agg_data_key is not None:
                    data_byte_range = agg_data_ranges[i]

                future = executor.submit(invoke, data_strs[i], self.executor_id,
                                         callgroup_id, call_id, func_key,
                                         host_job_meta.copy(),
                                         agg_data_key,
                                         data_byte_range)

                call_futures.append(future)

        res = [ft.result() for ft in call_futures]

        return res
コード例 #9
0
def create_map_job(config,
                   internal_storage,
                   executor_id,
                   job_id,
                   map_function,
                   iterdata,
                   obj_chunk_size=None,
                   extra_env=None,
                   extra_meta=None,
                   runtime_memory=None,
                   remote_invocation=False,
                   remote_invocation_groups=None,
                   invoke_pool_threads=128,
                   exclude_modules=None,
                   is_cf_cluster=False,
                   execution_timeout=EXECUTION_TIMEOUT,
                   overwrite_invoke_args=None):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """
    map_job_id = f'M{job_id}'
    data = utils.iterdata_as_list(iterdata)
    map_func = map_function
    map_iterdata = data
    new_invoke_pool_threads = invoke_pool_threads
    new_runtime_memory = runtime_memory

    # Object processing functionality
    parts_per_object = None
    if utils.is_object_processing_function(map_function):
        '''
        If it is object processing function, create partitions according chunk_size
        '''
        logger.debug(
            'ExecutorID {} | JobID {} - Calling map on partitions from object storage flow'
            .format(executor_id, job_id))
        arg_data = utils.verify_args(map_function,
                                     data,
                                     object_processing=True)
        map_iterdata, parts_per_object = create_partitions(
            config, arg_data, obj_chunk_size)
        map_func = partition_processor(map_function)
    # ########

    # Remote invocation functionality
    original_total_tasks = len(map_iterdata)
    if original_total_tasks == 1 or is_cf_cluster:
        remote_invocation = False
    if remote_invocation:
        rabbitmq_monitor = "CB_RABBITMQ_MONITOR" in os.environ

        def remote_invoker(input_data):
            pw = pywren.ibm_cf_executor(rabbitmq_monitor=rabbitmq_monitor)
            return pw.map(map_function,
                          input_data,
                          runtime_memory=runtime_memory,
                          invoke_pool_threads=invoke_pool_threads,
                          extra_env=extra_env,
                          extra_meta=extra_meta)

        map_func = remote_invoker
        if remote_invocation_groups:
            map_iterdata = [[
                iterdata[x:x + remote_invocation_groups]
            ] for x in range(0, original_total_tasks, remote_invocation_groups)
                            ]
        else:
            map_iterdata = [iterdata]
        new_invoke_pool_threads = 1
        new_runtime_memory = runtime_memory
    # ########

    job_description = _create_job(config,
                                  internal_storage,
                                  executor_id,
                                  map_job_id,
                                  map_func,
                                  map_iterdata,
                                  extra_env=extra_env,
                                  extra_meta=extra_meta,
                                  runtime_memory=new_runtime_memory,
                                  invoke_pool_threads=new_invoke_pool_threads,
                                  overwrite_invoke_args=overwrite_invoke_args,
                                  exclude_modules=exclude_modules,
                                  original_func_name=map_function.__name__,
                                  remote_invocation=remote_invocation,
                                  original_total_tasks=original_total_tasks,
                                  execution_timeout=execution_timeout)

    return job_description, parts_per_object
コード例 #10
0
def _create_job(config,
                internal_storage,
                executor_id,
                job_id,
                func,
                iterdata,
                extra_env=None,
                extra_meta=None,
                runtime_memory=None,
                invoke_pool_threads=128,
                overwrite_invoke_args=None,
                exclude_modules=None,
                original_func_name=None,
                remote_invocation=False,
                original_total_tasks=None,
                execution_timeout=EXECUTION_TIMEOUT):
    """
    :param func: the function to map over the data
    :param iterdata: An iterable of input data
    :param extra_env: Additional environment variables for CF environment. Default None.
    :param extra_meta: Additional metadata to pass to CF. Default None.
    :param remote_invocation: Enable remote invocation. Default False.
    :param invoke_pool_threads: Number of threads to use to invoke.
    :param data_all_as_one: upload the data as a single object. Default True
    :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
    :param exclude_modules: Explicitly keep these modules from pickled dependencies.
    :param original_func_name: Name of the function to invoke.
    :return: A list with size `len(iterdata)` of futures for each job
    :rtype:  list of futures.
    """
    log_level = os.getenv('CB_LOG_LEVEL')

    runtime_name = config['pywren']['runtime']
    if runtime_memory is None:
        runtime_memory = config['pywren']['runtime_memory']
    runtime_memory = int(runtime_memory)
    runtime_preinstalls = select_runtime(config, internal_storage, executor_id,
                                         job_id, runtime_name, runtime_memory)
    serializer = SerializeIndependent(runtime_preinstalls)

    if original_func_name:
        func_name = original_func_name
    else:
        func_name = func.__name__

    data = utils.iterdata_as_list(iterdata)

    if extra_env is not None:
        extra_env = utils.convert_bools_to_string(extra_env)

    if not data:
        return []

    # This allows multiple parameters in functions
    data = utils.verify_args(func, data)

    host_job_meta = {}
    job_description = {}

    job_description['runtime_name'] = runtime_name
    job_description['runtime_memory'] = runtime_memory
    job_description['task_execution_timeout'] = execution_timeout
    job_description['func_name'] = func_name
    job_description['extra_env'] = extra_env
    job_description['extra_meta'] = extra_meta
    job_description['total_calls'] = len(data)
    job_description['invoke_pool_threads'] = invoke_pool_threads
    job_description['overwrite_invoke_args'] = overwrite_invoke_args
    job_description['job_id'] = job_id
    job_description['remote_invocation'] = remote_invocation
    job_description['original_total_calls'] = original_total_tasks

    log_msg = 'ExecutorID {} | JobID {} - Serializing function and data'.format(
        executor_id, job_id)
    logger.debug(log_msg)
    # pickle func and all data (to capture module dependencies)
    func_and_data_ser, mod_paths = serializer([func] + data)

    func_str = func_and_data_ser[0]
    data_strs = func_and_data_ser[1:]
    data_size_bytes = sum(len(x) for x in data_strs)

    host_job_meta['agg_data'] = False
    host_job_meta['data_size_bytes'] = data_size_bytes

    log_msg = 'ExecutorID {} | JobID {} - Uploading function and data'.format(
        executor_id, job_id)
    logger.info(log_msg)
    if not log_level:
        print(log_msg, end=' ')

    if data_size_bytes < MAX_AGG_DATA_SIZE:
        agg_data_key = create_agg_data_key(internal_storage.prefix,
                                           executor_id, job_id)
        job_description['data_key'] = agg_data_key
        agg_data_bytes, agg_data_ranges = _agg_data(data_strs)
        job_description['data_ranges'] = agg_data_ranges
        agg_upload_time = time.time()
        internal_storage.put_data(agg_data_key, agg_data_bytes)
        host_job_meta['agg_data'] = True
        host_job_meta['data_upload_time'] = time.time() - agg_upload_time
        host_job_meta['data_upload_timestamp'] = time.time()
    else:
        log_msg = ('ExecutorID {} | JobID {} - Total data exceeded '
                   'maximum size of {} bytes'.format(executor_id, job_id,
                                                     MAX_AGG_DATA_SIZE))
        raise Exception(log_msg)

    if exclude_modules:
        for module in exclude_modules:
            for mod_path in list(mod_paths):
                if module in mod_path and mod_path in mod_paths:
                    mod_paths.remove(mod_path)

    module_data = create_module_data(mod_paths)
    # Create func and upload
    host_job_meta['func_name'] = func_name
    func_module_str = pickle.dumps(
        {
            'func': func_str,
            'module_data': module_data
        }, -1)
    host_job_meta['func_module_bytes'] = len(func_module_str)

    func_upload_time = time.time()
    func_key = create_func_key(internal_storage.prefix, executor_id, job_id)
    job_description['func_key'] = func_key
    internal_storage.put_func(func_key, func_module_str)
    host_job_meta['func_upload_time'] = time.time() - func_upload_time
    host_job_meta['func_upload_timestamp'] = time.time()

    if not log_level:
        func_and_data_size = utils.sizeof_fmt(
            host_job_meta['func_module_bytes'] +
            host_job_meta['data_size_bytes'])
        log_msg = '- Total: {}'.format(func_and_data_size)
        print(log_msg)

    job_description['host_job_meta'] = host_job_meta

    return job_description