예제 #1
0
def send_message(driver, error_handler, message):
    """Sends the message, if it is smaller than the max length, using the driver.

    Note: This function must rethrow any OSError exceptions that it encounters.

    Parameters
    ----------
    driver: MesosExecutorDriver
        The driver to send the message to.
    error_handler: fn(os_error)
        OSError exception handler for out of memory situations.
    message: dictionary
        The raw message to send.

    Returns
    -------
    whether the message was successfully sent
    """
    try:
        logging.info('Sending framework message {}'.format(message))
        message_string = json.dumps(message).encode('utf8')
        encoded_message = pm.encode_data(message_string)
        driver.sendFrameworkMessage(encoded_message)
        return True
    except Exception as exception:
        if cu.is_out_of_memory_error(exception):
            error_handler(exception)
        else:
            logging.exception(
                'Exception while sending message {}'.format(message))
        return False
예제 #2
0
파일: progress.py 프로젝트: yueri/Cook
    def retrieve_progress_states(self):
        """Generates the progress states by tailing the target_file.
        It tails a target file (using the tail() method) and uses the provided 
        regex to find a match for a progress message. The regex is expected to 
        generate two components in the match: the progress percent as an int and 
        a progress message string. When such a message is found, this method 
        yields the current progress as a dictionary.

        Note: This function must rethrow any OSError exceptions that it encounters.

        Returns
        -------
        An incrementally generated list of progress states.
        """
        last_unprocessed_report = None
        if self.progress_regex_string:
            sleep_time_ms = 50
            for line in self.tail(sleep_time_ms):
                try:
                    progress_report = self.match_progress_update(line)
                    if progress_report is not None:
                        if self.task_completed_signal.isSet():
                            last_unprocessed_report = progress_report
                        elif self.__update_progress(progress_report):
                            yield self.progress
                except Exception as exception:
                    if cu.is_out_of_memory_error(exception):
                        raise exception
                    else:
                        logging.exception('Skipping "%s" as a progress entry',
                                          line)
        if last_unprocessed_report is not None:
            if self.__update_progress(last_unprocessed_report):
                yield self.progress
예제 #3
0
파일: progress.py 프로젝트: yueri/Cook
 def track_progress(self):
     """Retrieves and sends progress updates using send_progress_update_fn.
     It sets the progress_complete_event before returning."""
     try:
         for current_progress in self.watcher.retrieve_progress_states():
             self.updater.send_progress_update(current_progress)
     except Exception as exception:
         if cu.is_out_of_memory_error(exception):
             self.os_error_handler(exception)
         else:
             logging.exception('Exception while tracking progress [tag=%s]',
                               self.location_tag)
     finally:
         self.progress_complete_event.set()
예제 #4
0
def os_error_handler(stop_signal, status_updater, os_error):
    """Exception handler for OSError.

    Parameters
    ----------
    stop_signal: threading.Event
        Event that determines if the process was requested to terminate.
    status_updater: StatusUpdater
        Wrapper object that sends task status messages.
    os_error: OSError
        The current executor config.

    Returns
    -------
    Nothing
    """
    stop_signal.set()
    logging.exception('OSError generated, requesting process to terminate')
    reason = cook.REASON_CONTAINER_LIMITATION_MEMORY if cu.is_out_of_memory_error(
        os_error) else None
    status_updater.update_status(cook.TASK_FAILED, reason=reason)
    cu.print_memory_usage()
예제 #5
0
def manage_task(driver, task, stop_signal, completed_signal, config):
    """Manages the execution of a task waiting for it to terminate normally or be killed.
       It also sends the task status updates, sandbox location and exit code back to the scheduler.
       Progress updates are tracked on a separate thread and are also sent to the scheduler.
       Setting the stop_signal will trigger termination of the task and associated cleanup.

    Returns
    -------
    Nothing
    """
    launched_process = None
    task_id = get_task_id(task)
    cio.print_and_log('Starting task {}'.format(task_id))
    status_updater = StatusUpdater(driver, task_id)

    inner_os_error_handler = functools.partial(os_error_handler, stop_signal,
                                               status_updater)
    try:
        # not yet started to run the task
        status_updater.update_status(cook.TASK_STARTING)

        # Use MESOS_DIRECTORY instead of MESOS_SANDBOX, to report the sandbox location outside of the container
        sandbox_message = {
            'sandbox-directory': config.mesos_directory,
            'task-id': task_id,
            'type': 'directory'
        }
        send_message(driver, inner_os_error_handler, sandbox_message)

        environment = retrieve_process_environment(config, task, os.environ)
        launched_process = launch_task(task, environment)
        if launched_process:
            # task has begun running successfully
            status_updater.update_status(cook.TASK_RUNNING)
            cio.print_and_log('Forked command at {}'.format(
                launched_process.pid))
        else:
            # task launch failed, report an error
            logging.error('Error in launching task')
            status_updater.update_status(cook.TASK_ERROR,
                                         reason=cook.REASON_TASK_INVALID)
            return

        task_completed_signal = Event(
        )  # event to track task execution completion
        sequence_counter = cp.ProgressSequenceCounter()

        send_progress_message = functools.partial(send_message, driver,
                                                  inner_os_error_handler)
        max_message_length = config.max_message_length
        sample_interval_ms = config.progress_sample_interval_ms
        progress_updater = cp.ProgressUpdater(task_id, max_message_length,
                                              sample_interval_ms,
                                              send_progress_message)
        progress_termination_signal = Event()

        def launch_progress_tracker(progress_location, location_tag):
            logging.info('Location {} tagged as [tag={}]'.format(
                progress_location, location_tag))
            progress_tracker = cp.ProgressTracker(
                config, stop_signal, task_completed_signal, sequence_counter,
                progress_updater, progress_termination_signal,
                progress_location, location_tag, inner_os_error_handler)
            progress_tracker.start()
            return progress_tracker

        progress_locations = {
            config.progress_output_name: 'progress',
            config.stderr_file(): 'stderr',
            config.stdout_file(): 'stdout'
        }
        logging.info('Progress will be tracked from {} locations'.format(
            len(progress_locations)))
        progress_trackers = [
            launch_progress_tracker(l, progress_locations[l])
            for l in progress_locations
        ]

        await_process_completion(launched_process, stop_signal,
                                 config.shutdown_grace_period_ms)
        task_completed_signal.set()

        progress_termination_timer = Timer(
            config.shutdown_grace_period_ms / 1000.0,
            progress_termination_signal.set)
        progress_termination_timer.daemon = True
        progress_termination_timer.start()

        # propagate the exit code
        exit_code = launched_process.returncode
        cio.print_and_log('Command exited with status {} (pid: {})'.format(
            exit_code, launched_process.pid))

        exit_message = {'exit-code': exit_code, 'task-id': task_id}
        send_message(driver, inner_os_error_handler, exit_message)

        # await progress updater termination if executor is terminating normally
        if not stop_signal.isSet():
            logging.info('Awaiting completion of progress updaters')
            [progress_tracker.wait() for progress_tracker in progress_trackers]
            logging.info('Progress updaters completed')

        # force send the latest progress state if available
        [
            progress_tracker.force_send_progress_update()
            for progress_tracker in progress_trackers
        ]

        # task either completed successfully or aborted with an error
        task_state = get_task_state(exit_code)
        output_task_completion(task_id, task_state)
        status_updater.update_status(task_state)

    except Exception as exception:
        if cu.is_out_of_memory_error(exception):
            inner_os_error_handler(exception)
        else:
            # task aborted with an error
            logging.exception('Error in executing task')
            output_task_completion(task_id, cook.TASK_FAILED)
            status_updater.update_status(
                cook.TASK_FAILED, reason=cook.REASON_EXECUTOR_TERMINATED)

    finally:
        # ensure completed_signal is set so driver can stop
        completed_signal.set()
        if launched_process and cs.is_process_running(launched_process):
            cs.send_signal(launched_process.pid, signal.SIGKILL)