Esempio n. 1
0
    def _invoke_job(self, job):
        """
        Normal Invocation
        Use local threads to perform all the function invocations
        """
        if self.remote_invoker:
            return self._invoke_job_remote(job)

        if self.should_run is False:
            self.running_workers = 0
            self.should_run = True
            self._start_async_invokers()

        if self.running_workers < self.workers:
            free_workers = self.workers - self.running_workers
            total_direct = free_workers * job.chunksize
            callids = range(job.total_calls)
            callids_to_invoke_direct = callids[:total_direct]
            callids_to_invoke_nondirect = callids[total_direct:]

            ci = len(callids_to_invoke_direct)
            cz = job.chunksize
            consumed_workers = ci // cz + (ci % cz > 0)
            self.running_workers += consumed_workers

            logger.debug('ExecutorID {} | JobID {} - Free workers:'
                         ' {} - Going to run {} activations in {} workers'
                         .format(job.executor_id, job.job_id, free_workers,
                                 len(callids_to_invoke_direct), consumed_workers))

            def _callback(future):
                future.result()

            invoke_futures = []
            executor = ThreadPoolExecutor(job.invoke_pool_threads)
            for call_ids_range in iterchunks(callids_to_invoke_direct, job.chunksize):
                future = executor.submit(self._invoke_task, job, call_ids_range)
                future.add_done_callback(_callback)
                invoke_futures.append(future)

            if self.sync:
                [f.result() for f in invoke_futures]

            # Put into the queue the rest of the callids to invoke within the process
            if callids_to_invoke_nondirect:
                logger.debug('ExecutorID {} | JobID {} - Putting remaining '
                             '{} function activations into pending queue'
                             .format(job.executor_id, job.job_id,
                                     len(callids_to_invoke_nondirect)))
                for call_ids_range in iterchunks(callids_to_invoke_nondirect, job.chunksize):
                    self.pending_calls_q.put((job, call_ids_range))
        else:
            logger.debug('ExecutorID {} | JobID {} - Reached maximun {} '
                         'workers, queuing {} function activations'
                         .format(job.executor_id, job.job_id,
                                 self.workers, job.total_calls))
            for call_ids_range in iterchunks(job.total_calls, job.chunksize):
                self.pending_calls_q.put((job, call_ids_range))
Esempio n. 2
0
def run_job_process(job_payload, work_queue):
    """
    Process responsible to wait for workers to become ready, and
    submit individual tasks of the job to them
    """
    job_key = job_payload['job_key']
    call_ids = job_payload['call_ids']
    chunksize = job_payload['chunksize']
    workers = job_payload['worker_instances']

    for call_ids_range in iterchunks(call_ids, chunksize):
        task_payload = copy.deepcopy(job_payload)
        dbr = task_payload['data_byte_ranges']
        task_payload['call_ids'] = call_ids_range
        task_payload['data_byte_ranges'] = [
            dbr[int(call_id)] for call_id in call_ids_range
        ]
        work_queue.put(task_payload)

    logger.info("Total tasks in {} work queue: {}".format(
        job_key, work_queue.qsize()))

    with ThreadPoolExecutor(len(workers)) as executor:
        for worker_info in workers:
            executor.submit(setup_worker, worker_info, work_queue, job_key)

    logger.info('All workers set up for job {}'.format(job_key))

    while not work_queue.empty():
        time.sleep(1)

    done = os.path.join(JOBS_DIR, job_key + '.done')
    Path(done).touch()

    logger.info('Finished job {} invocation.'.format(job_key))
Esempio n. 3
0
def main_job(action, encoded_payload):
    logger.info(
        "Lithops v{} - Starting Code Engine execution".format(__version__))

    payload = b64str_to_dict(encoded_payload)

    setup_lithops_logger(payload['log_level'])

    if (action == 'preinstalls'):
        runtime_packages(payload)
        return {"Execution": "Finished"}

    job_index = int(os.environ['JOB_INDEX'])
    payload['JOB_INDEX'] = job_index
    logger.info("Action {}. Job Index {}".format(action, job_index))

    act_id = str(uuid.uuid4()).replace('-', '')[:12]
    os.environ['__LITHOPS_ACTIVATION_ID'] = act_id

    chunksize = payload['chunksize']
    call_ids_ranges = [
        call_ids_range
        for call_ids_range in iterchunks(payload['call_ids'], chunksize)
    ]
    call_ids = call_ids_ranges[job_index]
    data_byte_ranges = [
        payload['data_byte_ranges'][int(call_id)] for call_id in call_ids
    ]

    payload['call_ids'] = call_ids
    payload['data_byte_ranges'] = data_byte_ranges

    function_handler(payload)

    return {"Execution": "Finished"}
Esempio n. 4
0
def run_job(encoded_payload):
    logger.info(
        "Lithops v{} - Starting kubernetes execution".format(__version__))

    payload = b64str_to_dict(encoded_payload)
    setup_lithops_logger(payload['log_level'])

    job_key = payload['job_key']
    idgiver_ip = os.environ['IDGIVER_POD_IP']
    res = requests.get('http://{}:{}/getid/{}'.format(idgiver_ip, IDGIVER_PORT,
                                                      job_key))
    job_index = int(res.text)

    act_id = str(uuid.uuid4()).replace('-', '')[:12]
    os.environ['__LITHOPS_ACTIVATION_ID'] = act_id
    os.environ['__LITHOPS_BACKEND'] = 'k8s'

    logger.info("Activation ID: {} - Job Index: {}".format(act_id, job_index))

    chunksize = payload['chunksize']
    call_ids_ranges = [
        call_ids_range
        for call_ids_range in iterchunks(payload['call_ids'], chunksize)
    ]
    call_ids = call_ids_ranges[job_index]
    data_byte_ranges = [
        payload['data_byte_ranges'][int(call_id)] for call_id in call_ids
    ]

    payload['call_ids'] = call_ids
    payload['data_byte_ranges'] = data_byte_ranges

    function_handler(payload)
Esempio n. 5
0
    def run(self, job_payload):
        """
        Run a job described in job_description
        """
        job = SimpleNamespace(**job_payload)

        job.total_calls = len(job.call_ids)

        logger.info('ExecutorID {} | JobID {} - Starting function '
                    'invocation - Total: {} activations'
                    .format(job.executor_id, job.job_id, job.total_calls))

        logger.info('ExecutorID {} | JobID {} - Chunksize:'
                    ' {} - Worker processes: {}'
                    .format(job.executor_id, job.job_id,
                            job.chunksize, job.worker_processes))

        for i in range(self.num_workers):
            self.token_bucket_q.put('#')

        for call_ids_range in iterchunks(job.call_ids, job.chunksize):
            self.pending_calls_q.put((job, call_ids_range))

        self.job_monitor.start_job_monitoring(job)

        invokers = []
        for inv_id in range(self.num_invokers):
            p = mp.Process(target=self._run_process, args=(inv_id, ))
            p.daemon = True
            p.start()
            invokers.append(p)

        for p in invokers:
            p.join()
Esempio n. 6
0
def run_job_worker(job_payload, work_queue):
    """
    Process responsible to wait for workers to become ready, and
    submit individual tasks of the job to them
    """
    job_key = job_payload['job_key']
    call_ids = job_payload['call_ids']
    chunksize = job_payload['chunksize']

    for call_ids_range in iterchunks(call_ids, chunksize):
        task_payload = copy.deepcopy(job_payload)
        dbr = task_payload['data_byte_ranges']
        task_payload['call_ids'] = call_ids_range
        task_payload['data_byte_ranges'] = [
            dbr[int(call_id)] for call_id in call_ids_range
        ]
        work_queue.put(task_payload)

    while not work_queue.empty():
        time.sleep(1)

    done = os.path.join(JOBS_DIR, job_key + '.done')
    Path(done).touch()

    logger.debug(f'Job process {job_key} finished')
Esempio n. 7
0
def run_job(encoded_payload):
    logger.info(
        "Lithops v{} - Starting kubernetes execution".format(__version__))

    payload = b64str_to_dict(encoded_payload)
    setup_lithops_logger(payload['log_level'])

    total_calls = payload['total_calls']
    job_key = payload['job_key']
    master_ip = os.environ['MASTER_POD_IP']

    chunksize = payload['chunksize']
    call_ids_ranges = [
        call_ids_range
        for call_ids_range in iterchunks(payload['call_ids'], chunksize)
    ]
    data_byte_ranges = payload['data_byte_ranges']

    job_finished = False
    while not job_finished:
        job_index = None

        while job_index is None:
            try:
                url = f'http://{master_ip}:{MASTER_PORT}/getid/{job_key}/{total_calls}'
                res = requests.get(url)
                job_index = int(res.text)
            except Exception:
                time.sleep(0.1)

        if job_index == -1:
            job_finished = True
            continue

        act_id = str(uuid.uuid4()).replace('-', '')[:12]
        os.environ['__LITHOPS_ACTIVATION_ID'] = act_id
        os.environ['__LITHOPS_BACKEND'] = 'k8s'

        logger.info("Activation ID: {} - Job Index: {}".format(
            act_id, job_index))

        call_ids = call_ids_ranges[job_index]
        dbr = [data_byte_ranges[int(call_id)] for call_id in call_ids]
        payload['call_ids'] = call_ids
        payload['data_byte_ranges'] = dbr

        function_handler(payload)
Esempio n. 8
0
def run_create():
    """
    Runs a given job remotely in workers, in create mode
    """
    global BUDGET_KEEPER

    logger.info('Running job on worker VMs')

    job_payload = flask.request.get_json(force=True, silent=True)
    if job_payload and not isinstance(job_payload, dict):
        return error('The action did not receive a dictionary as an argument.')

    try:
        runtime = job_payload['runtime_name']
        verify_runtime_name(runtime)
    except Exception as e:
        return error(str(e))

    job_key = job_payload['job_key']
    call_ids = job_payload['call_ids']
    chunksize = job_payload['chunksize']
    workers = job_payload['woreker_instances']

    BUDGET_KEEPER.last_usage_time = time.time()
    BUDGET_KEEPER.update_config(job_payload['config']['standalone'])
    BUDGET_KEEPER.jobs[job_key] = 'running'

    with ThreadPoolExecutor(len(workers)) as executor:
        for call_ids_range in iterchunks(call_ids, chunksize):
            worker_info = workers.pop(0)
            executor.submit(run_job_on_worker, worker_info, call_ids_range,
                            copy.deepcopy(job_payload))

    done = os.path.join(JOBS_DIR, job_key + '.done')
    Path(done).touch()

    act_id = str(uuid.uuid4()).replace('-', '')[:12]
    response = flask.jsonify({'activationId': act_id})
    response.status_code = 202

    return response
Esempio n. 9
0
    def run(self, job):
        """
        Run a job described in job_description
        """

        job.runtime_name = self.runtime_name

        try:
            while True:
                self.token_bucket_q.get_nowait()
                self.ongoing_activations -= 1
        except Exception:
            pass

        self.prometheus.send_metric(name='job_total_calls',
                                    value=job.total_calls,
                                    labels=(('executor_id', job.executor_id),
                                            ('job_id',
                                             job.job_id), ('function_name',
                                                           job.function_name)))

        if self.remote_invoker:
            """
            Remote Invocation
            Use a single cloud function to perform all the function invocations
            """
            log_level = logger.getEffectiveLevel()
            logging.getLogger('lithops').setLevel(logging.CRITICAL)
            self.select_runtime(job.job_id, self.REMOTE_INVOKER_MEMORY)
            logging.getLogger('lithops').setLevel(log_level)
            log_msg = ('ExecutorID {} | JobID {} - Starting function '
                       'invocation: {}() - Total: {} activations'.format(
                           job.executor_id, job.job_id, job.function_name,
                           job.total_calls))
            logger.info(log_msg)
            self._invoke_remote(job)
            # th = Thread(target=self._invoke_remote, args=(job,), daemon=True)
            # th.start()

        else:
            """
            Normal Invocation
            Use local threads to perform all the function invocations
            """
            try:
                if self.running_flag.value == 0:
                    self.running_workers = 0
                    self.running_flag.value = 1
                    self._start_invoker_process()

                log_msg = ('ExecutorID {} | JobID {} - Starting function '
                           'invocation: {}() - Total: {} activations'.format(
                               job.executor_id, job.job_id, job.function_name,
                               job.total_calls))
                logger.info(log_msg)

                logger.debug('ExecutorID {} | JobID {} - Chunksize:'
                             ' {} - Worker processes: {}'.format(
                                 job.executor_id, job.job_id, job.chunksize,
                                 job.worker_processes))

                if self.running_workers < self.workers:
                    free_workers = self.workers - self.running_workers
                    total_direct = free_workers * job.chunksize
                    callids = range(job.total_calls)
                    callids_to_invoke_direct = callids[:total_direct]
                    callids_to_invoke_nondirect = callids[total_direct:]

                    ci = len(callids_to_invoke_direct)
                    cz = job.chunksize
                    consumed_workers = ci // cz + (ci % cz > 0)
                    self.running_workers += consumed_workers

                    logger.debug(
                        'ExecutorID {} | JobID {} - Free workers:'
                        ' {} - Going to run {} activations in {} workers'.
                        format(job.executor_id, job.job_id, free_workers,
                               len(callids_to_invoke_direct),
                               consumed_workers))

                    def _callback(future):
                        future.result()

                    executor = ThreadPoolExecutor(job.invoke_pool_threads)
                    for call_ids_range in iterchunks(callids_to_invoke_direct,
                                                     job.chunksize):
                        future = executor.submit(self._invoke, job,
                                                 call_ids_range)
                        future.add_done_callback(_callback)
                    time.sleep(0.1)

                    # Put into the queue the rest of the callids to invoke within the process
                    if callids_to_invoke_nondirect:
                        logger.debug(
                            'ExecutorID {} | JobID {} - Putting remaining '
                            '{} function activations into pending queue'.
                            format(job.executor_id, job.job_id,
                                   len(callids_to_invoke_nondirect)))
                        for call_ids_range in iterchunks(
                                callids_to_invoke_nondirect, job.chunksize):
                            self.pending_calls_q.put((job, call_ids_range))
                else:
                    logger.debug(
                        'ExecutorID {} | JobID {} - Reached maximun {} '
                        'workers, queuing {} function activations'.format(
                            job.executor_id, job.job_id, self.workers,
                            job.total_calls))
                    for call_ids_range in iterchunks(job.total_calls,
                                                     job.chunksize):
                        self.pending_calls_q.put((job, call_ids_range))

                self.job_monitor.start_job_monitoring(job)

            except (KeyboardInterrupt, Exception) as e:
                self.stop()
                raise e

        log_file = os.path.join(LOGS_DIR, job.job_key + '.log')
        logger.info(
            "ExecutorID {} | JobID {} - View execution logs at {}".format(
                job.executor_id, job.job_id, log_file))

        # Create all futures
        futures = []
        for i in range(job.total_calls):
            call_id = "{:05d}".format(i)
            fut = ResponseFuture(call_id, job, job.metadata.copy(),
                                 self.storage_config)
            fut._set_state(ResponseFuture.State.Invoked)
            futures.append(fut)

        return futures
Esempio n. 10
0
        lithops_payload = json.loads(lithops_payload_json)
        setup_lithops_logger(lithops_payload.get('log_level', logging.INFO))

        logger.info(
            "Lithops v{} - Starting AWS Batch execution".format(__version__))

        job_index = int(os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX', 0))
        lithops_payload['JOB_INDEX'] = job_index
        logger.info('Job index {}'.format(job_index))

        act_id = str(uuid.uuid4()).replace('-', '')[:12]
        os.environ['__LITHOPS_ACTIVATION_ID'] = act_id

        chunksize = lithops_payload['chunksize']
        call_ids_ranges = [
            call_ids_range for call_ids_range in iterchunks(
                lithops_payload['call_ids'], chunksize)
        ]
        call_ids = call_ids_ranges[job_index]
        data_byte_ranges = [
            lithops_payload['data_byte_ranges'][int(call_id)]
            for call_id in call_ids
        ]

        lithops_payload['call_ids'] = call_ids
        lithops_payload['data_byte_ranges'] = data_byte_ranges

        function_handler(lithops_payload)
    else:
        raise Exception('Unknown action {}'.format(action))