Ejemplo n.º 1
0
    def _send_status_rabbitmq(self):
        """
        Send the status event to RabbitMQ
        """
        dmpd_response_status = json.dumps(self.response)
        drs = sizeof_fmt(len(dmpd_response_status))

        executor_id = self.response['executor_id']
        job_id = self.response['job_id']

        rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url')
        status_sent = False
        output_query_count = 0
        params = pika.URLParameters(rabbit_amqp_url)
        exchange = 'lithops-{}-{}'.format(executor_id, job_id)

        while not status_sent and output_query_count < 5:
            output_query_count = output_query_count + 1
            try:
                connection = pika.BlockingConnection(params)
                channel = connection.channel()
                channel.exchange_declare(exchange=exchange, exchange_type='fanout', auto_delete=True)
                channel.basic_publish(exchange=exchange, routing_key='',
                                      body=dmpd_response_status)
                connection.close()
                logger.info("Execution status sent to rabbitmq - Size: {}".format(drs))
                status_sent = True
            except Exception as e:
                logger.error("Unable to send status to rabbitmq")
                logger.error(str(e))
                logger.info('Retrying to send status to rabbitmq...')
                time.sleep(0.2)
Ejemplo n.º 2
0
def list_bucket(prefix, bucket, backend, debug, config):
    if config:
        config = load_yaml_config(config)
    log_level = logging.INFO if not debug else logging.DEBUG
    setup_lithops_logger(log_level)
    storage = Storage(config=config, backend=backend)
    logger.info('Listing objects in bucket {}'.format(bucket))
    objects = storage.list_objects(bucket, prefix=prefix)

    if objects:
        width = max([len(obj['Key']) for obj in objects])

        print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width))
        print('-' * width, '\t', '-' * 20, '\t', '-' * 9)
        for obj in objects:
            key = obj['Key']
            date = obj['LastModified'].strftime("%b %d %Y %H:%M:%S")
            size = sizeof_fmt(obj['Size'])
            print('{:{width}} \t {} \t {:>9}'.format(key, date, size, width=width))
        print()
        print('Total objects: {}'.format(len(objects)))
    else:
        width = 10
        print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width))
        print('-' * width, '\t', '-' * 20, '\t', '-' * 9)
        print('\nThe bucket is empty')
Ejemplo n.º 3
0
 def put_object(self, bucket_name, key, data):
     """
     Put an object in COS. Override the object if the key already exists.
     :param key: key of the object.
     :param data: data of the object
     :type data: str/bytes
     :return: None
     """
     retries = 0
     status = None
     while status is None:
         try:
             res = self.s3_client.put_object(Bucket=bucket_name,
                                             Key=key,
                                             Body=data)
             status = 'OK' if res['ResponseMetadata'][
                 'HTTPStatusCode'] == 200 else 'Error'
             try:
                 logger.debug('PUT Object {} - Size: {} - {}'.format(
                     key, sizeof_fmt(len(data)), status))
             except Exception:
                 logger.debug('PUT Object {} {}'.format(key, status))
         except botocore.exceptions.ClientError as e:
             if e.response['Error']['Code'] == "NoSuchKey":
                 raise StorageNoSuchKeyError(bucket_name, key)
             else:
                 raise e
         except botocore.exceptions.ReadTimeoutError as e:
             if retries == OBJ_REQ_RETRIES:
                 raise e
             logger.debug('PUT Object timeout. Retrying request')
             retries += 1
     return True
Ejemplo n.º 4
0
    def _send(self):
        """
        Send the status event to RabbitMQ
        """
        dmpd_response_status = json.dumps(self.status)
        drs = sizeof_fmt(len(dmpd_response_status))

        status_sent = False
        output_query_count = 0

        queues = []
        executor_keys = self.job.executor_id.split('-')
        for k in range(int(len(executor_keys)/2)):
            qname = 'lithops-{}'.format('-'.join(executor_keys[0:k*3+2]))
            queues.append(qname)

        while not status_sent and output_query_count < 5:
            output_query_count = output_query_count + 1
            try:
                with self._create_channel() as ch:
                    for queue in queues:
                        ch.basic_publish(exchange='', routing_key=queue, body=dmpd_response_status)
                logger.info("Execution status sent to RabbitMQ - Size: {}".format(drs))
                status_sent = True
            except Exception:
                time.sleep(0.2)

        if self.status['type'] == '__end__':
            super()._send()
Ejemplo n.º 5
0
def _split_objects_from_buckets(map_func_args_list, keys_dict, chunk_size,
                                chunk_number):
    """
    Create partitions from bucket/s
    """
    partitions = []
    parts_per_object = []

    if chunk_number:
        logger.debug('Chunk size set to {}'.format(chunk_size))
    elif chunk_size:
        logger.debug('Chunk number set to {}'.format(chunk_number))
    else:
        logger.debug('Chunk size and chunk number not set ')

    for entry in map_func_args_list:
        # Each entry is a bucket
        sb, bucket, prefix, obj_name = utils.split_object_url(entry['obj'])

        for key, obj_size in keys_dict[bucket].items():
            if prefix in key and obj_size > 0:

                if chunk_number:
                    chunk_rest = obj_size % chunk_number
                    obj_chunk_size = (obj_size // chunk_number) + \
                        round((chunk_rest / chunk_number) + 0.5)
                elif chunk_size:
                    obj_chunk_size = chunk_size
                else:
                    obj_chunk_size = obj_size

                size = total_partitions = 0

                ci = obj_size
                cz = obj_chunk_size
                parts = ci // cz + (ci % cz > 0)
                logger.debug(
                    'Creating {} partitions from object {} ({})'.format(
                        parts, key, sizeof_fmt(obj_size)))

                while size < obj_size:
                    brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD)
                    brange = None if obj_size == obj_chunk_size else brange

                    partition = entry.copy()
                    partition['obj'] = CloudObject(sb, bucket, key)
                    partition['obj'].data_byte_range = brange
                    partition['obj'].chunk_size = obj_chunk_size
                    partition['obj'].part = total_partitions
                    partitions.append(partition)

                    total_partitions += 1
                    size += obj_chunk_size

                parts_per_object.append(total_partitions)

    return partitions, parts_per_object
Ejemplo n.º 6
0
def list_bucket(bucket, backend, debug):
    log_level = logging.INFO if not debug else logging.DEBUG
    setup_lithops_logger(log_level)
    storage = Storage(backend=backend)
    logger.info('Listing objects in bucket {}'.format(bucket))
    objects = storage.list_objects(bucket)

    width = max([len(obj['Key']) for obj in objects])

    print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width))
    print('-' * width, '\t', '-' * 20, '\t', '-' * 9)
    for obj in objects:
        key = obj['Key']
        date = obj['LastModified'].strftime("%b %d %Y %H:%M:%S")
        size = sizeof_fmt(obj['Size'])
        print('{:{width}} \t {} \t {:>9}'.format(key, date, size, width=width))
    print()
Ejemplo n.º 7
0
    def _split(entry):
        obj_size = None
        object_url = entry['url']
        metadata = requests.head(object_url)

        if 'content-length' in metadata.headers:
            obj_size = int(metadata.headers['content-length'])

        if chunk_number and obj_size:
            chunk_rest = obj_size % chunk_number
            obj_chunk_size = (obj_size // chunk_number) + \
                round((chunk_rest / chunk_number) + 0.5)
        elif chunk_size and obj_size:
            obj_chunk_size = chunk_size
        elif obj_size:
            obj_chunk_size = obj_size
        else:
            obj_chunk_size = obj_size = 1

        if 'accept-ranges' not in metadata.headers:
            obj_chunk_size = obj_size

        size = total_partitions = 0

        ci = obj_size
        cz = obj_chunk_size
        parts = ci // cz + (ci % cz > 0)
        logger.debug('Creating {} partitions from object {} ({})'.format(
            parts, object_url, sizeof_fmt(obj_size)))

        while size < obj_size:
            brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD)
            brange = None if obj_size == obj_chunk_size else brange

            partition = entry.copy()
            partition['url'] = CloudObjectUrl(object_url)
            partition['url'].data_byte_range = brange
            partition['url'].chunk_size = obj_chunk_size
            partition['url'].part = total_partitions
            partitions.append(partition)

            total_partitions += 1
            size += obj_chunk_size

        parts_per_object.append(total_partitions)
Ejemplo n.º 8
0
 def put_object(self, container_name, key, data):
     """
     Put an object in Swift. Override the object if the key already exists.
     :param key: key of the object.
     :param data: data of the object
     :type data: str/bytes
     :return: None
     """
     url = '/'.join([self.endpoint, container_name, key])
     try:
         res = self.session.put(url, data=data)
         status = 'OK' if res.status_code == 201 else 'Error'
         try:
             logger.debug('PUT Object {} - Size: {} - {}'.format(key, sizeof_fmt(len(data)), status))
         except Exception:
             logger.debug('PUT Object {} - {}'.format(key, status))
     except Exception as e:
         print(e)
Ejemplo n.º 9
0
def runtime_packages(storage_config):
    logger.info("Extracting preinstalled Python modules...")
    internal_storage = InternalStorage(storage_config)

    runtime_meta = dict()
    mods = list(pkgutil.iter_modules())
    runtime_meta['preinstalls'] = [entry for entry in sorted([[mod, is_pkg] for _, mod, is_pkg in mods])]
    python_version = sys.version_info
    runtime_meta['python_ver'] = str(python_version[0])+"."+str(python_version[1])
    
    activation_id = storage_config['activation_id']

    status_key = create_runtime_meta_key(JOBS_PREFIX, activation_id)
    logger.debug("Runtime metadata key {}".format(status_key))
    dmpd_response_status = json.dumps(runtime_meta)
    drs = sizeof_fmt(len(dmpd_response_status))
    logger.info("Storing execution stats - Size: {}".format(drs))
    internal_storage.put_data(status_key, dmpd_response_status)
Ejemplo n.º 10
0
    def _send_status_os(self):
        """
        Send the status event to the Object Storage
        """
        executor_id = self.response['executor_id']
        job_id = self.response['job_id']
        call_id = self.response['call_id']
        act_id = self.response['activation_id']

        if self.response['type'] == '__init__':
            init_key = create_init_key(JOBS_PREFIX, executor_id, job_id, call_id, act_id)
            self.internal_storage.put_data(init_key, '')

        elif self.response['type'] == '__end__':
            status_key = create_status_key(JOBS_PREFIX, executor_id, job_id, call_id)
            dmpd_response_status = json.dumps(self.response)
            drs = sizeof_fmt(len(dmpd_response_status))
            logger.info("Storing execution stats - Size: {}".format(drs))
            self.internal_storage.put_data(status_key, dmpd_response_status)
Ejemplo n.º 11
0
def get_memory_usage(formatted=True):
    """
    Gets the current memory usage of the runtime.
    To be used only in the action code.
    """
    if not is_unix_system():
        return
    split_args = False
    pids_to_show = None
    discriminate_by_pid = False

    ps_mem.verify_environment(pids_to_show)
    sorted_cmds, shareds, count, total, swaps, total_swap = \
        ps_mem.get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
                                include_self=True, only_self=False)
    if formatted:
        return sizeof_fmt(int(ps_mem.human(total, units=1)))
    else:
        return int(ps_mem.human(total, units=1))
Ejemplo n.º 12
0
    def create_partition(bucket, key, entry):

        if key.endswith('/'):
            logger.debug(
                f'Discarding object "{key}" as it is a prefix folder (0.0B)')
            return

        obj_size = keys_dict[bucket][key]

        if chunk_number:
            chunk_rest = obj_size % chunk_number
            obj_chunk_size = (obj_size // chunk_number) + \
                round((chunk_rest / chunk_number) + 0.5)
        elif chunk_size:
            obj_chunk_size = chunk_size
        else:
            obj_chunk_size = obj_size

        size = total_partitions = 0

        ci = obj_size
        cz = obj_chunk_size
        parts = ci // cz + (ci % cz > 0)
        logger.debug('Creating {} partitions from object {} ({})'.format(
            parts, key, sizeof_fmt(obj_size)))

        while size < obj_size:
            brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD)
            brange = None if obj_size == obj_chunk_size else brange

            partition = entry.copy()
            partition['obj'] = CloudObject(sb, bucket, key)
            partition['obj'].data_byte_range = brange
            partition['obj'].chunk_size = obj_chunk_size
            partition['obj'].part = total_partitions
            partitions.append(partition)

            total_partitions += 1
            size += obj_chunk_size

        parts_per_object.append(total_partitions)
Ejemplo n.º 13
0
    def _split(entry):
        path = entry['obj']
        file_stats = os.stat(entry['obj'])
        obj_size = int(file_stats.st_size)

        if chunk_number and obj_size:
            chunk_rest = obj_size % chunk_number
            obj_chunk_size = (obj_size // chunk_number) + \
                round((chunk_rest / chunk_number) + 0.5)
        elif chunk_size and obj_size:
            obj_chunk_size = chunk_size
        elif obj_size:
            obj_chunk_size = obj_size
        else:
            obj_chunk_size = obj_size = 1

        size = total_partitions = 0

        ci = obj_size
        cz = obj_chunk_size
        parts = ci // cz + (ci % cz > 0)
        logger.debug('Creating {} partitions from file {} ({})'.format(
            parts, path, sizeof_fmt(obj_size)))

        while size < obj_size:
            brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD)
            brange = None if obj_size == obj_chunk_size else brange

            partition = entry.copy()
            partition['obj'] = CloudObjectLocal(path)
            partition['obj'].data_byte_range = brange
            partition['obj'].chunk_size = obj_chunk_size
            partition['obj'].part = total_partitions
            partitions.append(partition)

            total_partitions += 1
            size += obj_chunk_size

        parts_per_object.append(total_partitions)
Ejemplo n.º 14
0
def get_memory_usage(formatted=True):
    """
    Gets the current memory usage of the runtime.
    To be used only in the action code.
    """
    from lithops.libs import ps_mem
    if not is_unix_system() or os.geteuid() != 0:
        # Non Unix systems and non root users can't run
        # the ps_mem module
        return

    split_args = False
    pids_to_show = None
    discriminate_by_pid = False

    ps_mem.verify_environment(pids_to_show)
    sorted_cmds, shareds, count, total, swaps, total_swap = \
        ps_mem.get_memory_usage(pids_to_show, split_args, discriminate_by_pid,
                                include_self=True, only_self=False)
    if formatted:
        return sizeof_fmt(int(ps_mem.human(total, units=1)))
    else:
        return int(ps_mem.human(total, units=1))
Ejemplo n.º 15
0
 def put_object(self, bucket_name, key, data):
     '''
     Put an object in COS. Override the object if the key already exists.
     :param key: key of the object.
     :param data: data of the object
     :type data: str/bytes
     :return: None
     '''
     try:
         res = self.s3_client.put_object(Bucket=bucket_name,
                                         Key=key,
                                         Body=data)
         status = 'OK' if res['ResponseMetadata'][
             'HTTPStatusCode'] == 200 else 'Error'
         try:
             logger.debug('PUT Object {} - Size: {} - {}'.format(
                 key, sizeof_fmt(len(data)), status))
         except Exception:
             logger.debug('PUT Object {} {}'.format(key, status))
     except botocore.exceptions.ClientError as e:
         if e.response['Error']['Code'] == 'NoSuchKey':
             raise StorageNoSuchKeyError(bucket_name, key)
         else:
             raise e
Ejemplo n.º 16
0
def run_job(job):
    """
    Runs a single job within a separate process
    """
    start_tstamp = time.time()
    setup_lithops_logger(job.log_level)

    logger.info("Lithops v{} - Starting execution".format(__version__))
    logger.info("Execution ID: {}/{}".format(job.job_key, job.call_id))
    logger.debug("Runtime name: {}".format(job.runtime_name))
    if job.runtime_memory:
        logger.debug("Runtime memory: {}MB".format(job.runtime_memory))
    logger.debug("Function timeout: {}s".format(job.execution_timeout))

    env = job.extra_env
    env['LITHOPS_WORKER'] = 'True'
    env['PYTHONUNBUFFERED'] = 'True'
    env['LITHOPS_CONFIG'] = json.dumps(job.config)
    env['PYTHONPATH'] = "{}:{}".format(os.getcwd(), LITHOPS_LIBS_PATH)
    env['__LITHOPS_SESSION_ID'] = '-'.join([job.job_key, job.call_id])
    os.environ.update(env)

    storage_config = extract_storage_config(job.config)
    internal_storage = InternalStorage(storage_config)

    call_status = CallStatus(job.config, internal_storage)
    call_status.response['worker_start_tstamp'] = start_tstamp
    call_status.response['host_submit_tstamp'] = job.host_submit_tstamp
    call_status.response['call_id'] = job.call_id
    call_status.response['job_id'] = job.job_id
    call_status.response['executor_id'] = job.executor_id

    show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False'))

    try:
        if __version__ != job.lithops_version:
            msg = (
                "Lithops version mismatch. Host version: {} - Runtime version: {}"
                .format(job.lithops_version, __version__))
            raise RuntimeError('HANDLER', msg)

        # send init status event
        call_status.send('__init__')

        if show_memory_peak:
            mm_handler_conn, mm_conn = Pipe()
            memory_monitor = Thread(target=memory_monitor_worker,
                                    args=(mm_conn, ))
            memory_monitor.start()

        job.jr_stats_file = os.path.join(job.job_dir, 'jobrunner.stats.txt')
        handler_conn, jobrunner_conn = Pipe()
        jobrunner = JobRunner(job, jobrunner_conn, internal_storage)
        logger.debug('Starting JobRunner process')
        jrp = Process(target=jobrunner.run) if is_unix_system() else Thread(
            target=jobrunner.run)
        jrp.start()

        jrp.join(job.execution_timeout)
        logger.debug('JobRunner process finished')

        if jrp.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            try:
                jrp.terminate()
            except Exception:
                # thread does not have terminate method
                pass
            msg = ('Function exceeded maximum time of {} seconds and was '
                   'killed'.format(job.execution_timeout))
            raise TimeoutError('HANDLER', msg)

        if show_memory_peak:
            mm_handler_conn.send('STOP')
            memory_monitor.join()
            peak_memory_usage = int(mm_handler_conn.recv())
            logger.info("Peak memory usage: {}".format(
                sizeof_fmt(peak_memory_usage)))
            call_status.response['peak_memory_usage'] = peak_memory_usage

        if not handler_conn.poll():
            logger.error(
                'No completion message received from JobRunner process')
            logger.debug('Assuming memory overflow...')
            # Only 1 message is returned by jobrunner when it finishes.
            # If no message, this means that the jobrunner process was killed.
            # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM.
            msg = 'Function exceeded maximum memory and was killed'
            raise MemoryError('HANDLER', msg)

        if os.path.exists(job.jr_stats_file):
            with open(job.jr_stats_file, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        call_status.response[key] = float(value)
                    except Exception:
                        call_status.response[key] = value
                    if key in [
                            'exception', 'exc_pickle_fail', 'result',
                            'new_futures'
                    ]:
                        call_status.response[key] = eval(value)

    except Exception:
        # internal runtime exceptions
        print('----------------------- EXCEPTION !-----------------------')
        traceback.print_exc(file=sys.stdout)
        print('----------------------------------------------------------')
        call_status.response['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(
            pickled_exc)  # this is just to make sure they can be unpickled
        call_status.response['exc_info'] = str(pickled_exc)

    finally:
        call_status.response['worker_end_tstamp'] = time.time()

        with open(job.log_file, 'rb') as lf:
            log_str = base64.b64encode(zlib.compress(lf.read())).decode()
            call_status.response['logs'] = log_str

        call_status.send('__end__')

        # Unset specific env vars
        for key in job.extra_env:
            os.environ.pop(key, None)
        os.environ.pop('__LITHOPS_TOTAL_EXECUTORS', None)

        logger.info("Finished")
Ejemplo n.º 17
0
    def run(self):
        """
        Runs the function
        """
        # self.stats.write('worker_jobrunner_start_tstamp', time.time())
        logger.info("Started")
        result = None
        exception = False
        try:
            loaded_func_all = self._get_function_and_modules()
            self._save_modules(loaded_func_all['module_data'])
            function = self._unpickle_function(loaded_func_all['func'])
            data = self._load_data()

            if strtobool(os.environ.get('__PW_REDUCE_JOB', 'False')):
                self._wait_futures(data)
            elif is_object_processing_function(function):
                self._load_object(data)

            self._fill_optional_args(function, data)

            logger.info("Going to execute '{}()'".format(str(
                function.__name__)))
            print('---------------------- FUNCTION LOG ----------------------',
                  flush=True)
            function_start_tstamp = time.time()
            result = function(**data)
            function_end_tstamp = time.time()
            print('----------------------------------------------------------',
                  flush=True)
            logger.info("Success function execution")

            self.stats.write('worker_func_start_tstamp', function_start_tstamp)
            self.stats.write('worker_func_end_tstamp', function_end_tstamp)
            self.stats.write(
                'worker_func_exec_time',
                round(function_end_tstamp - function_start_tstamp, 8))

            # Check for new futures
            if result is not None:
                self.stats.write("result", True)
                if isinstance(result, ResponseFuture) or \
                   (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)):
                    self.stats.write('new_futures', True)

                logger.debug("Pickling result")
                output_dict = {'result': result}
                pickled_output = pickle.dumps(output_dict)

            else:
                logger.debug("No result to store")
                self.stats.write("result", False)

            # self.stats.write('worker_jobrunner_end_tstamp', time.time())

        except Exception:
            exception = True
            self.stats.write("exception", True)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print('----------------------- EXCEPTION !-----------------------',
                  flush=True)
            traceback.print_exc(file=sys.stdout)
            print('----------------------------------------------------------',
                  flush=True)

            try:
                logger.debug("Pickling exception")
                pickled_exc = pickle.dumps(
                    (exc_type, exc_value, exc_traceback))
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

            except Exception as pickle_exception:
                # Shockingly often, modules like subprocess don't properly
                # call the base Exception.__init__, which results in them
                # being unpickleable. As a result, we actually wrap this in a try/catch block
                # and more-carefully handle the exceptions if any part of this save / test-reload
                # fails
                self.stats.write("exc_pickle_fail", True)
                pickled_exc = pickle.dumps({
                    'exc_type': str(exc_type),
                    'exc_value': str(exc_value),
                    'exc_traceback': exc_traceback,
                    'pickle_exception': pickle_exception
                })
                pickle.loads(pickled_exc
                             )  # this is just to make sure it can be unpickled
                self.stats.write("exc_info", str(pickled_exc))
        finally:
            store_result = strtobool(os.environ.get('STORE_RESULT', 'True'))
            if result is not None and store_result and not exception:
                output_upload_start_tstamp = time.time()
                logger.info("Storing function result - Size: {}".format(
                    sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_end_tstamp = time.time()
                self.stats.write(
                    "worker_result_upload_time",
                    round(
                        output_upload_end_tstamp - output_upload_start_tstamp,
                        8))
            self.jobrunner_conn.send("Finished")
            logger.info("Finished")
Ejemplo n.º 18
0
def _create_job(config,
                internal_storage,
                executor_id,
                job_id,
                func,
                iterdata,
                runtime_meta,
                runtime_memory,
                extra_env,
                include_modules,
                exclude_modules,
                execution_timeout,
                host_job_meta,
                invoke_pool_threads=128):
    """
    :param func: the function to map over the data
    :param iterdata: An iterable of input data
    :param extra_env: Additional environment variables for CF environment. Default None.
    :param extra_meta: Additional metadata to pass to CF. Default None.
    :param remote_invocation: Enable remote invocation. Default False.
    :param invoke_pool_threads: Number of threads to use to invoke.
    :param data_all_as_one: upload the data as a single object. Default True
    :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
    :param exclude_modules: Explicitly keep these modules from pickled dependencies.
    :return: A list with size `len(iterdata)` of futures for each job
    :rtype:  list of futures.
    """
    ext_env = {} if extra_env is None else extra_env.copy()
    if ext_env:
        ext_env = utils.convert_bools_to_string(ext_env)
        logger.debug("Extra environment vars {}".format(ext_env))

    job = SimpleNamespace()
    job.executor_id = executor_id
    job.job_id = job_id
    job.extra_env = ext_env
    job.execution_timeout = execution_timeout or config['lithops'][
        'execution_timeout']
    job.function_name = func.__name__
    job.total_calls = len(iterdata)

    mode = config['lithops']['mode']

    if mode == SERVERLESS:
        job.invoke_pool_threads = invoke_pool_threads or config['serverless'][
            'invoke_pool_threads']
        job.runtime_memory = runtime_memory or config['serverless'][
            'runtime_memory']
        job.runtime_timeout = config['serverless']['runtime_timeout']
        if job.execution_timeout >= job.runtime_timeout:
            job.execution_timeout = job.runtime_timeout - 5

    elif mode == STANDALONE:
        job.runtime_memory = None
        runtime_timeout = config['standalone']['hard_dismantle_timeout']
        if job.execution_timeout >= runtime_timeout:
            job.execution_timeout = runtime_timeout - 10

    elif mode == LOCALHOST:
        job.runtime_memory = None
        job.runtime_timeout = execution_timeout

    exclude_modules_cfg = config['lithops'].get('exclude_modules', [])
    include_modules_cfg = config['lithops'].get('include_modules', [])

    exc_modules = set()
    inc_modules = set()
    if exclude_modules_cfg:
        exc_modules.update(exclude_modules_cfg)
    if exclude_modules:
        exc_modules.update(exclude_modules)
    if include_modules_cfg is not None:
        inc_modules.update(include_modules_cfg)
    if include_modules_cfg is None and not include_modules:
        inc_modules = None
    if include_modules is not None and include_modules:
        inc_modules.update(include_modules)
    if include_modules is None:
        inc_modules = None

    logger.debug(
        'ExecutorID {} | JobID {} - Serializing function and data'.format(
            executor_id, job_id))
    job_serialize_start = time.time()
    serializer = SerializeIndependent(runtime_meta['preinstalls'])
    func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules,
                                              exc_modules)
    data_strs = func_and_data_ser[1:]
    data_size_bytes = sum(len(x) for x in data_strs)
    module_data = create_module_data(mod_paths)
    func_str = func_and_data_ser[0]
    func_module_str = pickle.dumps(
        {
            'func': func_str,
            'module_data': module_data
        }, -1)
    func_module_size_bytes = len(func_module_str)
    total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes)
    host_job_meta['host_job_serialize_time'] = round(
        time.time() - job_serialize_start, 6)

    host_job_meta['data_size_bytes'] = data_size_bytes
    host_job_meta['func_module_size_bytes'] = func_module_size_bytes

    if 'data_limit' in config['lithops']:
        data_limit = config['lithops']['data_limit']
    else:
        data_limit = MAX_AGG_DATA_SIZE

    if data_limit and data_size_bytes > data_limit * 1024**2:
        log_msg = (
            'ExecutorID {} | JobID {} - Total data exceeded maximum size '
            'of {}'.format(executor_id, job_id,
                           sizeof_fmt(data_limit * 1024**2)))
        raise Exception(log_msg)

    logger.info('ExecutorID {} | JobID {} - Uploading function and data '
                '- Total: {}'.format(executor_id, job_id, total_size))

    # Upload data
    data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id)
    job.data_key = data_key
    data_bytes, data_ranges = utils.agg_data(data_strs)
    job.data_ranges = data_ranges
    data_upload_start = time.time()
    internal_storage.put_data(data_key, data_bytes)
    data_upload_end = time.time()

    host_job_meta['host_data_upload_time'] = round(
        data_upload_end - data_upload_start, 6)
    func_upload_start = time.time()

    # Upload function and modules
    if config[mode].get('customized_runtime'):
        # Prepare function and modules locally to store in the runtime image later
        function_file = func.__code__.co_filename
        function_hash = hashlib.md5(open(function_file,
                                         'rb').read()).hexdigest()[:16]
        mod_hash = hashlib.md5(repr(
            sorted(mod_paths)).encode('utf-8')).hexdigest()[:16]

        uuid = f'{function_hash}{mod_hash}'
        func_key = create_func_key(JOBS_PREFIX, uuid, "")

        _store_func_and_modules(func_key, func_str, module_data)

        job.ext_runtime_uuid = uuid
    else:
        func_key = create_func_key(JOBS_PREFIX, executor_id, job_id)
        internal_storage.put_func(func_key, func_module_str)

    job.func_key = func_key
    func_upload_end = time.time()

    host_job_meta['host_func_upload_time'] = round(
        func_upload_end - func_upload_start, 6)

    host_job_meta['host_job_created_time'] = round(
        time.time() - host_job_meta['host_job_create_tstamp'], 6)

    job.metadata = host_job_meta

    return job
Ejemplo n.º 19
0
def _create_job(config,
                internal_storage,
                executor_id,
                job_id,
                func,
                data,
                runtime_meta,
                runtime_memory=None,
                extra_env=None,
                invoke_pool_threads=128,
                include_modules=[],
                exclude_modules=[],
                execution_timeout=None,
                host_job_meta=None):
    """
    :param func: the function to map over the data
    :param iterdata: An iterable of input data
    :param extra_env: Additional environment variables for CF environment. Default None.
    :param extra_meta: Additional metadata to pass to CF. Default None.
    :param remote_invocation: Enable remote invocation. Default False.
    :param invoke_pool_threads: Number of threads to use to invoke.
    :param data_all_as_one: upload the data as a single object. Default True
    :param overwrite_invoke_args: Overwrite other args. Mainly used for testing.
    :param exclude_modules: Explicitly keep these modules from pickled dependencies.
    :return: A list with size `len(iterdata)` of futures for each job
    :rtype:  list of futures.
    """
    log_level = logger.getEffectiveLevel() != logging.WARNING

    runtime_name = config['lithops']['runtime']
    if runtime_memory is None:
        runtime_memory = config['lithops']['runtime_memory']

    ext_env = {} if extra_env is None else extra_env.copy()
    if ext_env:
        ext_env = utils.convert_bools_to_string(ext_env)
        logger.debug("Extra environment vars {}".format(ext_env))

    if not data:
        return []

    if execution_timeout is None:
        execution_timeout = config['lithops']['runtime_timeout'] - 5

    job_description = {}
    job_description['runtime_name'] = runtime_name
    job_description['runtime_memory'] = runtime_memory
    job_description['execution_timeout'] = execution_timeout
    job_description['function_name'] = func.__name__
    job_description['extra_env'] = ext_env
    job_description['total_calls'] = len(data)
    job_description['invoke_pool_threads'] = invoke_pool_threads
    job_description['executor_id'] = executor_id
    job_description['job_id'] = job_id

    exclude_modules_cfg = config['lithops'].get('exclude_modules', [])
    include_modules_cfg = config['lithops'].get('include_modules', [])

    exc_modules = set()
    inc_modules = set()
    if exclude_modules_cfg:
        exc_modules.update(exclude_modules_cfg)
    if exclude_modules:
        exc_modules.update(exclude_modules)
    if include_modules_cfg is not None:
        inc_modules.update(include_modules_cfg)
    if include_modules_cfg is None and not include_modules:
        inc_modules = None
    if include_modules is not None and include_modules:
        inc_modules.update(include_modules)
    if include_modules is None:
        inc_modules = None

    logger.debug(
        'ExecutorID {} | JobID {} - Serializing function and data'.format(
            executor_id, job_id))
    job_serialize_start = time.time()
    serializer = SerializeIndependent(runtime_meta['preinstalls'])
    func_and_data_ser, mod_paths = serializer([func] + data, inc_modules,
                                              exc_modules)
    data_strs = func_and_data_ser[1:]
    data_size_bytes = sum(len(x) for x in data_strs)
    module_data = create_module_data(mod_paths)
    func_str = func_and_data_ser[0]
    func_module_str = pickle.dumps(
        {
            'func': func_str,
            'module_data': module_data
        }, -1)
    func_module_size_bytes = len(func_module_str)
    total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes)
    host_job_meta['host_job_serialize_time'] = round(
        time.time() - job_serialize_start, 6)

    host_job_meta['data_size_bytes'] = data_size_bytes
    host_job_meta['func_module_size_bytes'] = func_module_size_bytes

    if 'data_limit' in config['lithops']:
        data_limit = config['lithops']['data_limit']
    else:
        data_limit = MAX_AGG_DATA_SIZE

    if data_limit and data_size_bytes > data_limit * 1024**2:
        log_msg = (
            'ExecutorID {} | JobID {} - Total data exceeded maximum size '
            'of {}'.format(executor_id, job_id,
                           sizeof_fmt(data_limit * 1024**2)))
        raise Exception(log_msg)

    log_msg = ('ExecutorID {} | JobID {} - Uploading function and data '
               '- Total: {}'.format(executor_id, job_id, total_size))
    logger.info(log_msg)
    if not log_level:
        print(log_msg)

    # Upload data
    data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id)
    job_description['data_key'] = data_key
    data_bytes, data_ranges = utils.agg_data(data_strs)
    job_description['data_ranges'] = data_ranges
    data_upload_start = time.time()
    internal_storage.put_data(data_key, data_bytes)
    data_upload_end = time.time()

    host_job_meta['host_data_upload_time'] = round(
        data_upload_end - data_upload_start, 6)

    # Upload function and modules
    func_upload_start = time.time()
    func_key = create_func_key(JOBS_PREFIX, executor_id, job_id)
    job_description['func_key'] = func_key
    internal_storage.put_func(func_key, func_module_str)
    func_upload_end = time.time()

    host_job_meta['host_func_upload_time'] = round(
        func_upload_end - func_upload_start, 6)

    host_job_meta['host_job_created_time'] = round(
        time.time() - host_job_meta['host_job_create_tstamp'], 6)

    job_description['metadata'] = host_job_meta

    return job_description
Ejemplo n.º 20
0
def _create_job(config, internal_storage, executor_id, job_id, func,
                iterdata,  runtime_meta, runtime_memory, extra_env,
                include_modules, exclude_modules, execution_timeout,
                host_job_meta, chunksize=None):
    """
    Creates a new Job
    """
    global FUNCTION_CACHE

    ext_env = {} if extra_env is None else extra_env.copy()
    if ext_env:
        ext_env = utils.convert_bools_to_string(ext_env)
        logger.debug("Extra environment vars {}".format(ext_env))

    mode = config['lithops']['mode']
    backend = config['lithops']['backend']

    job = SimpleNamespace()
    job.chunksize = chunksize or config['lithops']['chunksize']
    job.worker_processes = config[backend]['worker_processes']
    job.execution_timeout = execution_timeout or config['lithops']['execution_timeout']
    job.executor_id = executor_id
    job.job_id = job_id
    job.job_key = create_job_key(job.executor_id, job.job_id)
    job.extra_env = ext_env
    job.function_name = func.__name__ if inspect.isfunction(func) or inspect.ismethod(func) else type(func).__name__
    job.total_calls = len(iterdata)

    if mode == SERVERLESS:
        job.runtime_memory = runtime_memory or config[backend]['runtime_memory']
        job.runtime_timeout = config[backend]['runtime_timeout']
        if job.execution_timeout >= job.runtime_timeout:
            job.execution_timeout = job.runtime_timeout - 5

    elif mode in STANDALONE:
        job.runtime_memory = None
        runtime_timeout = config[STANDALONE]['hard_dismantle_timeout']
        if job.execution_timeout >= runtime_timeout:
            job.execution_timeout = runtime_timeout - 10

    elif mode == LOCALHOST:
        job.runtime_memory = None
        job.runtime_timeout = None

    exclude_modules_cfg = config['lithops'].get('exclude_modules', [])
    include_modules_cfg = config['lithops'].get('include_modules', [])

    exc_modules = set()
    inc_modules = set()
    if exclude_modules_cfg:
        exc_modules.update(exclude_modules_cfg)
    if exclude_modules:
        exc_modules.update(exclude_modules)
    if include_modules_cfg is not None:
        inc_modules.update(include_modules_cfg)
    if include_modules_cfg is None and not include_modules:
        inc_modules = None
    if include_modules is not None and include_modules:
        inc_modules.update(include_modules)
    if include_modules is None:
        inc_modules = None

    logger.debug('ExecutorID {} | JobID {} - Serializing function and data'.format(executor_id, job_id))
    job_serialize_start = time.time()
    serializer = SerializeIndependent(runtime_meta['preinstalls'])
    func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules, exc_modules)
    data_strs = func_and_data_ser[1:]
    data_size_bytes = sum(len(x) for x in data_strs)
    module_data = create_module_data(mod_paths)
    func_str = func_and_data_ser[0]
    func_module_str = pickle.dumps({'func': func_str, 'module_data': module_data}, -1)
    func_module_size_bytes = len(func_module_str)

    host_job_meta['host_job_serialize_time'] = round(time.time()-job_serialize_start, 6)
    host_job_meta['data_size_bytes'] = data_size_bytes
    host_job_meta['func_module_size_bytes'] = func_module_size_bytes

    # Check data limit
    if 'data_limit' in config['lithops']:
        data_limit = config['lithops']['data_limit']
    else:
        data_limit = MAX_AGG_DATA_SIZE
    if data_limit and data_size_bytes > data_limit*1024**2:
        log_msg = ('ExecutorID {} | JobID {} - Total data exceeded maximum size '
                   'of {}'.format(executor_id, job_id, utils.sizeof_fmt(data_limit*1024**2)))
        raise Exception(log_msg)

    # Upload function and data
    upload_function = not config['lithops'].get('customized_runtime', False)
    upload_data = not (len(str(data_strs[0])) * job.chunksize < 8*1204 and backend in FAAS_BACKENDS)

    # Upload function and modules
    if upload_function:
        function_hash = hashlib.md5(func_module_str).hexdigest()
        job.func_key = create_func_key(executor_id, function_hash)
        if job.func_key not in FUNCTION_CACHE:
            logger.debug('ExecutorID {} | JobID {} - Uploading function and modules '
                         'to the storage backend'.format(executor_id, job_id))
            func_upload_start = time.time()
            internal_storage.put_func(job.func_key, func_module_str)
            func_upload_end = time.time()
            host_job_meta['host_func_upload_time'] = round(func_upload_end - func_upload_start, 6)
            FUNCTION_CACHE.add(job.func_key)
        else:
            logger.debug('ExecutorID {} | JobID {} - Function and modules '
                         'found in local cache'.format(executor_id, job_id))
            host_job_meta['host_func_upload_time'] = 0

    else:
        # Prepare function and modules locally to store in the runtime image later
        function_file = func.__code__.co_filename
        function_hash = hashlib.md5(open(function_file, 'rb').read()).hexdigest()[:16]
        mod_hash = hashlib.md5(repr(sorted(mod_paths)).encode('utf-8')).hexdigest()[:16]
        job.func_key = func_key_suffix
        job.ext_runtime_uuid = '{}{}'.format(function_hash, mod_hash)
        job.local_tmp_dir = os.path.join(CUSTOM_RUNTIME_DIR, job.ext_runtime_uuid)
        _store_func_and_modules(job.local_tmp_dir, job.func_key, func_str, module_data)
        host_job_meta['host_func_upload_time'] = 0

    # upload data
    if upload_data:
        # Upload iterdata to COS only if a single element is greater than 8KB
        logger.debug('ExecutorID {} | JobID {} - Uploading data to the storage backend'
                     .format(executor_id, job_id))
        # pass_iteradata through an object storage file
        data_key = create_data_key(executor_id, job_id)
        job.data_key = data_key
        data_bytes, data_byte_ranges = utils.agg_data(data_strs)
        job.data_byte_ranges = data_byte_ranges
        data_upload_start = time.time()
        internal_storage.put_data(data_key, data_bytes)
        data_upload_end = time.time()
        host_job_meta['host_data_upload_time'] = round(data_upload_end-data_upload_start, 6)

    else:
        # pass iteradata as part of the invocation payload
        logger.debug('ExecutorID {} | JobID {} - Data per activation is < '
                     '{}. Passing data through invocation payload'
                     .format(executor_id, job_id, utils.sizeof_fmt(8*1024)))
        job.data_key = None
        job.data_byte_ranges = None
        job.data_byte_strs = data_strs
        host_job_meta['host_data_upload_time'] = 0

    host_job_meta['host_job_created_time'] = round(time.time() - host_job_meta['host_job_create_tstamp'], 6)

    job.metadata = host_job_meta

    return job
Ejemplo n.º 21
0
def _split_objects_from_keys(map_func_args_list, keys_dict, chunk_size,
                             chunk_number):
    """
    Create partitions from a list of objects keys
    """
    if chunk_number:
        logger.debug('Chunk size set to {}'.format(chunk_size))
    elif chunk_size:
        logger.debug('Chunk number set to {}'.format(chunk_number))
    else:
        logger.debug('Chunk size and chunk number not set ')

    partitions = []
    parts_per_object = []

    for entry in map_func_args_list:
        # each entry is a key
        sb, bucket, prefix, obj_name = utils.split_object_url(entry['obj'])
        key = '/'.join([prefix, obj_name]) if prefix else obj_name

        try:
            obj_size = keys_dict[bucket][key]
        except Exception:
            raise Exception(
                'Object key "{}" does not exist in "{}" bucket'.format(
                    key, bucket))

        if chunk_number:
            chunk_rest = obj_size % chunk_number
            obj_chunk_size = (obj_size // chunk_number) + \
                round((chunk_rest / chunk_number) + 0.5)
        elif chunk_size:
            obj_chunk_size = chunk_size
        else:
            obj_chunk_size = obj_size

        size = total_partitions = 0

        ci = obj_size
        cz = obj_chunk_size
        parts = ci // cz + (ci % cz > 0)
        logger.debug('Creating {} partitions from object {} ({})'.format(
            parts, key, sizeof_fmt(obj_size)))

        while size < obj_size:
            brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD)
            brange = None if obj_size == obj_chunk_size else brange

            partition = entry.copy()
            partition['obj'] = CloudObject(sb, bucket, key)
            partition['obj'].data_byte_range = brange
            partition['obj'].chunk_size = obj_chunk_size
            partition['obj'].part = total_partitions
            partitions.append(partition)

            total_partitions += 1
            size += obj_chunk_size

        parts_per_object.append(total_partitions)

    return partitions, parts_per_object
Ejemplo n.º 22
0
    def run(self):
        """
        Runs the function
        """
        # self.stats.write('worker_jobrunner_start_tstamp', time.time())
        logger.debug("Process started")
        result = None
        exception = False
        fn_name = None

        try:
            func = pickle.loads(self.job.func)
            data = pickle.loads(self.job.data)

            if strtobool(os.environ.get('__LITHOPS_REDUCE_JOB', 'False')):
                self._wait_futures(data)
            elif is_object_processing_function(func):
                self._load_object(data)

            self._fill_optional_args(func, data)

            fn_name = func.__name__ if inspect.isfunction(func) \
                or inspect.ismethod(func) else type(func).__name__

            self.prometheus.send_metric(name='function_start',
                                        value=time.time(),
                                        type='gauge',
                                        labels=(('job_id', self.job.job_key),
                                                ('call_id', '-'.join([
                                                    self.job.job_key,
                                                    self.job.call_id
                                                ])), ('function_name', fn_name
                                                      or 'undefined')))

            logger.info("Going to execute '{}()'".format(str(fn_name)))
            print('---------------------- FUNCTION LOG ----------------------')
            function_start_tstamp = time.time()
            result = func(**data)
            function_end_tstamp = time.time()
            print('----------------------------------------------------------')
            logger.info("Success function execution")

            self.stats.write('worker_func_start_tstamp', function_start_tstamp)
            self.stats.write('worker_func_end_tstamp', function_end_tstamp)
            self.stats.write(
                'worker_func_exec_time',
                round(function_end_tstamp - function_start_tstamp, 8))

            # Check for new futures
            if result is not None:
                if isinstance(result, ResponseFuture) or isinstance(result, FuturesList) \
                   or (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)):
                    self.stats.write('new_futures', pickle.dumps(result))
                    result = None
                else:
                    self.stats.write("result", True)
                    logger.debug("Pickling result")
                    output_dict = {'result': result}
                    pickled_output = pickle.dumps(output_dict)
                    self.stats.write('func_result_size', len(pickled_output))

            if result is None:
                logger.debug("No result to store")
                self.stats.write("result", False)
                self.stats.write('func_result_size', 0)

            # self.stats.write('worker_jobrunner_end_tstamp', time.time())

        except Exception:
            exception = True
            self.stats.write("exception", True)
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print('----------------------- EXCEPTION !-----------------------')
            traceback.print_exc(file=sys.stdout)
            print('----------------------------------------------------------')

            try:
                logger.debug("Pickling exception")
                pickled_exc = pickle.dumps(
                    (exc_type, exc_value, exc_traceback))
                pickle.loads(
                    pickled_exc
                )  # this is just to make sure they can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

            except Exception as pickle_exception:
                # Shockingly often, modules like subprocess don't properly
                # call the base Exception.__init__, which results in them
                # being unpickleable. As a result, we actually wrap this in a try/catch block
                # and more-carefully handle the exceptions if any part of this save / test-reload
                # fails
                self.stats.write("exc_pickle_fail", True)
                pickled_exc = pickle.dumps({
                    'exc_type': str(exc_type),
                    'exc_value': str(exc_value),
                    'exc_traceback': exc_traceback,
                    'pickle_exception': pickle_exception
                })
                pickle.loads(pickled_exc
                             )  # this is just to make sure it can be unpickled
                self.stats.write("exc_info", str(pickled_exc))

        finally:
            self.prometheus.send_metric(name='function_end',
                                        value=time.time(),
                                        type='gauge',
                                        labels=(('job_id', self.job.job_key),
                                                ('call_id', '-'.join([
                                                    self.job.job_key,
                                                    self.job.call_id
                                                ])), ('function_name', fn_name
                                                      or 'undefined')))

            store_result = strtobool(os.environ.get('STORE_RESULT', 'True'))
            if result is not None and store_result and not exception:
                output_upload_start_tstamp = time.time()
                logger.info("Storing function result - Size: {}".format(
                    sizeof_fmt(len(pickled_output))))
                self.internal_storage.put_data(self.output_key, pickled_output)
                output_upload_end_tstamp = time.time()
                self.stats.write(
                    "worker_result_upload_time",
                    round(
                        output_upload_end_tstamp - output_upload_start_tstamp,
                        8))
            self.jobrunner_conn.send("Finished")
            logger.info("Process finished")
Ejemplo n.º 23
0
def function_handler(event):
    start_tstamp = time.time()

    log_level = event['log_level']
    cloud_logging_config(log_level)
    logger.debug("Action handler started")

    extra_env = event.get('extra_env', {})
    os.environ.update(extra_env)

    os.environ.update({'LITHOPS_FUNCTION': 'True', 'PYTHONUNBUFFERED': 'True'})
    os.environ.pop('LITHOPS_TOTAL_EXECUTORS', None)

    config = event['config']
    call_id = event['call_id']
    job_id = event['job_id']
    executor_id = event['executor_id']
    exec_id = "{}/{}/{}".format(executor_id, job_id, call_id)
    logger.info("Execution-ID: {}".format(exec_id))

    runtime_name = event['runtime_name']
    runtime_memory = event['runtime_memory']
    execution_timeout = event['execution_timeout']
    logger.debug("Runtime name: {}".format(runtime_name))
    logger.debug("Runtime memory: {}MB".format(runtime_memory))
    logger.debug("Function timeout: {}s".format(execution_timeout))

    func_key = event['func_key']
    data_key = event['data_key']
    data_byte_range = event['data_byte_range']

    storage_config = extract_storage_config(config)
    internal_storage = InternalStorage(storage_config)

    call_status = CallStatus(config, internal_storage)
    call_status.response['host_submit_tstamp'] = event['host_submit_tstamp']
    call_status.response['worker_start_tstamp'] = start_tstamp
    context_dict = {
        'python_version': os.environ.get("PYTHON_VERSION"),
        'call_id': call_id,
        'job_id': job_id,
        'executor_id': executor_id,
        'activation_id': os.environ.get('__PW_ACTIVATION_ID')
    }
    call_status.response.update(context_dict)

    show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False'))

    try:
        if version.__version__ != event['lithops_version']:
            msg = (
                "Lithops version mismatch. Host version: {} - Runtime version: {}"
                .format(event['lithops_version'], version.__version__))
            raise RuntimeError('HANDLER', msg)

        # send init status event
        call_status.send('__init__')

        # call_status.response['free_disk_bytes'] = free_disk_space("/tmp")
        custom_env = {
            'LITHOPS_CONFIG': json.dumps(config),
            'LITHOPS_EXECUTION_ID': exec_id,
            'PYTHONPATH': "{}:{}".format(os.getcwd(), LITHOPS_LIBS_PATH)
        }
        os.environ.update(custom_env)

        jobrunner_stats_dir = os.path.join(STORAGE_FOLDER,
                                           storage_config['bucket'],
                                           JOBS_PREFIX, executor_id, job_id,
                                           call_id)
        os.makedirs(jobrunner_stats_dir, exist_ok=True)
        jobrunner_stats_filename = os.path.join(jobrunner_stats_dir,
                                                'jobrunner.stats.txt')

        jobrunner_config = {
            'lithops_config':
            config,
            'call_id':
            call_id,
            'job_id':
            job_id,
            'executor_id':
            executor_id,
            'func_key':
            func_key,
            'data_key':
            data_key,
            'log_level':
            log_level,
            'data_byte_range':
            data_byte_range,
            'output_key':
            create_output_key(JOBS_PREFIX, executor_id, job_id, call_id),
            'stats_filename':
            jobrunner_stats_filename
        }

        if show_memory_peak:
            mm_handler_conn, mm_conn = Pipe()
            memory_monitor = Thread(target=memory_monitor_worker,
                                    args=(mm_conn, ))
            memory_monitor.start()

        handler_conn, jobrunner_conn = Pipe()
        jobrunner = JobRunner(jobrunner_config, jobrunner_conn,
                              internal_storage)
        logger.debug('Starting JobRunner process')
        local_execution = strtobool(
            os.environ.get('__PW_LOCAL_EXECUTION', 'False'))
        jrp = Thread(target=jobrunner.run) if local_execution else Process(
            target=jobrunner.run)
        jrp.start()

        jrp.join(execution_timeout)
        logger.debug('JobRunner process finished')

        if jrp.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            try:
                jrp.terminate()
            except Exception:
                # thread does not have terminate method
                pass
            msg = ('Function exceeded maximum time of {} seconds and was '
                   'killed'.format(execution_timeout))
            raise TimeoutError('HANDLER', msg)

        if show_memory_peak:
            mm_handler_conn.send('STOP')
            memory_monitor.join()
            peak_memory_usage = int(mm_handler_conn.recv())
            logger.info("Peak memory usage: {}".format(
                sizeof_fmt(peak_memory_usage)))
            call_status.response['peak_memory_usage'] = peak_memory_usage

        if not handler_conn.poll():
            logger.error(
                'No completion message received from JobRunner process')
            logger.debug('Assuming memory overflow...')
            # Only 1 message is returned by jobrunner when it finishes.
            # If no message, this means that the jobrunner process was killed.
            # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM.
            msg = 'Function exceeded maximum memory and was killed'
            raise MemoryError('HANDLER', msg)

        if os.path.exists(jobrunner_stats_filename):
            with open(jobrunner_stats_filename, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        call_status.response[key] = float(value)
                    except Exception:
                        call_status.response[key] = value
                    if key in [
                            'exception', 'exc_pickle_fail', 'result',
                            'new_futures'
                    ]:
                        call_status.response[key] = eval(value)

    except Exception:
        # internal runtime exceptions
        print('----------------------- EXCEPTION !-----------------------',
              flush=True)
        traceback.print_exc(file=sys.stdout)
        print('----------------------------------------------------------',
              flush=True)
        call_status.response['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(
            pickled_exc)  # this is just to make sure they can be unpickled
        call_status.response['exc_info'] = str(pickled_exc)

    finally:
        call_status.response['worker_end_tstamp'] = time.time()
        call_status.send('__end__')

        for key in extra_env:
            os.environ.pop(key)

        logger.info("Finished")
Ejemplo n.º 24
0
def _create_job(config,
                internal_storage,
                executor_id,
                job_id,
                func,
                iterdata,
                runtime_meta,
                runtime_memory,
                extra_env,
                include_modules,
                exclude_modules,
                execution_timeout,
                host_job_meta,
                chunksize=None,
                worker_processes=None,
                invoke_pool_threads=16):
    """
    Creates a new Job
    """
    ext_env = {} if extra_env is None else extra_env.copy()
    if ext_env:
        ext_env = utils.convert_bools_to_string(ext_env)
        logger.debug("Extra environment vars {}".format(ext_env))

    job = SimpleNamespace()
    job.chunksize = chunksize or config['lithops']['chunksize']
    job.worker_processes = worker_processes or config['lithops'][
        'worker_processes']
    job.execution_timeout = execution_timeout or config['lithops'][
        'execution_timeout']
    job.executor_id = executor_id
    job.job_id = job_id
    job.job_key = create_job_key(job.executor_id, job.job_id)
    job.extra_env = ext_env
    job.function_name = func.__name__
    job.total_calls = len(iterdata)

    mode = config['lithops']['mode']

    if mode == SERVERLESS:
        job.invoke_pool_threads = invoke_pool_threads or config['serverless'][
            'invoke_pool_threads']
        job.runtime_memory = runtime_memory or config['serverless'][
            'runtime_memory']
        job.runtime_timeout = config['serverless']['runtime_timeout']
        if job.execution_timeout >= job.runtime_timeout:
            job.execution_timeout = job.runtime_timeout - 5

    elif mode == STANDALONE:
        job.runtime_memory = None
        runtime_timeout = config['standalone']['hard_dismantle_timeout']
        if job.execution_timeout >= runtime_timeout:
            job.execution_timeout = runtime_timeout - 10

    elif mode == LOCALHOST:
        job.runtime_memory = None
        job.runtime_timeout = execution_timeout

    exclude_modules_cfg = config['lithops'].get('exclude_modules', [])
    include_modules_cfg = config['lithops'].get('include_modules', [])

    exc_modules = set()
    inc_modules = set()
    if exclude_modules_cfg:
        exc_modules.update(exclude_modules_cfg)
    if exclude_modules:
        exc_modules.update(exclude_modules)
    if include_modules_cfg is not None:
        inc_modules.update(include_modules_cfg)
    if include_modules_cfg is None and not include_modules:
        inc_modules = None
    if include_modules is not None and include_modules:
        inc_modules.update(include_modules)
    if include_modules is None:
        inc_modules = None

    logger.debug(
        'ExecutorID {} | JobID {} - Serializing function and data'.format(
            executor_id, job_id))
    job_serialize_start = time.time()
    serializer = SerializeIndependent(runtime_meta['preinstalls'])
    func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules,
                                              exc_modules)
    data_strs = func_and_data_ser[1:]
    data_size_bytes = sum(len(x) for x in data_strs)
    module_data = create_module_data(mod_paths)
    func_str = func_and_data_ser[0]
    func_module_str = pickle.dumps(
        {
            'func': func_str,
            'module_data': module_data
        }, -1)
    func_module_size_bytes = len(func_module_str)
    total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes)
    host_job_meta['host_job_serialize_time'] = round(
        time.time() - job_serialize_start, 6)

    host_job_meta['data_size_bytes'] = data_size_bytes
    host_job_meta['func_module_size_bytes'] = func_module_size_bytes

    if 'data_limit' in config['lithops']:
        data_limit = config['lithops']['data_limit']
    else:
        data_limit = MAX_AGG_DATA_SIZE

    if data_limit and data_size_bytes > data_limit * 1024**2:
        log_msg = (
            'ExecutorID {} | JobID {} - Total data exceeded maximum size '
            'of {}'.format(executor_id, job_id,
                           sizeof_fmt(data_limit * 1024**2)))
        raise Exception(log_msg)

    logger.info('ExecutorID {} | JobID {} - Uploading function and data '
                '- Total: {}'.format(executor_id, job_id, total_size))

    # Upload data
    data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id)
    job.data_key = data_key
    data_bytes, data_byte_ranges = utils.agg_data(data_strs)
    job.data_byte_ranges = data_byte_ranges
    data_upload_start = time.time()
    internal_storage.put_data(data_key, data_bytes)
    data_upload_end = time.time()

    host_job_meta['host_data_upload_time'] = round(
        data_upload_end - data_upload_start, 6)
    func_upload_start = time.time()

    # Upload function and modules
    if config[mode].get('customized_runtime'):
        # Prepare function and modules locally to store in the runtime image later
        function_file = func.__code__.co_filename
        function_hash = hashlib.md5(open(function_file,
                                         'rb').read()).hexdigest()[:16]
        mod_hash = hashlib.md5(repr(
            sorted(mod_paths)).encode('utf-8')).hexdigest()[:16]

        uuid = '{}{}'.format(function_hash, mod_hash)
        func_key = create_func_key(JOBS_PREFIX, uuid, "")

        _store_func_and_modules(func_key, func_str, module_data)

        job.ext_runtime_uuid = uuid
    else:
        func_key = create_func_key(JOBS_PREFIX, executor_id, job_id)
        internal_storage.put_func(func_key, func_module_str)

    job.func_key = func_key
    func_upload_end = time.time()

    host_job_meta['host_func_upload_time'] = round(
        func_upload_end - func_upload_start, 6)

    host_job_meta['host_job_created_time'] = round(
        time.time() - host_job_meta['host_job_create_tstamp'], 6)

    job.metadata = host_job_meta

    return job