예제 #1
0
    def test_retrieve_progress_states_os_error_from_match_progress_update(
            self):
        class FakeProgressWatcher(cp.ProgressWatcher):
            def __init__(self, output_name, location_tag, sequence_counter,
                         max_bytes_read_per_line, progress_regex_string,
                         stop_signal, task_completed_signal,
                         progress_termination_signal):
                super().__init__(output_name, location_tag, sequence_counter,
                                 max_bytes_read_per_line,
                                 progress_regex_string, stop_signal,
                                 task_completed_signal,
                                 progress_termination_signal)

            def tail(self, sleep_time_ms):
                yield (b'Stage One complete')
                yield (b'progress: 25 Twenty-Five percent')
                yield (b'Stage Two complete')

            def match_progress_update(self, input_data):
                if self.current_progress() is not None:
                    raise OSError(errno.ENOMEM, 'No Memory')
                else:
                    return super().match_progress_update(input_data)

        regex = 'progress: ([0-9]*\.?[0-9]+) (.*)'
        counter = cp.ProgressSequenceCounter()
        watcher = FakeProgressWatcher('', '', counter, 1024, regex, Event(),
                                      Event(), Event())

        with self.assertRaises(OSError) as context:
            for progress in watcher.retrieve_progress_states():
                self.assertIsNotNone(progress)
        self.assertEqual('No Memory', context.exception.strerror)
예제 #2
0
    def test_collect_progress_updates_dev_null(self):
        file_name = tu.ensure_directory('build/collect_progress_test.' +
                                        tu.get_random_task_id())
        progress_regex = '\^\^\^\^JOB-PROGRESS:\s+([0-9]*\.?[0-9]+)($|\s+.*)'
        location = '/dev/null'
        stop = Event()
        completed = Event()
        termination = Event()

        file = open(file_name, 'w+')
        file.flush()
        counter = cp.ProgressSequenceCounter()
        dn_watcher = cp.ProgressWatcher(location, 'dn', counter, 1024,
                                        progress_regex, stop, completed,
                                        termination)
        out_watcher = cp.ProgressWatcher(file_name, 'so', counter, 1024,
                                         progress_regex, stop, completed,
                                         termination)

        try:

            def print_to_file():
                file.write('Stage One complete\n')
                file.write('^^^^JOB-PROGRESS: 100 100-percent\n')
                file.flush()
                file.close()
                completed.set()

            print_thread = Thread(target=print_to_file, args=())
            print_thread.start()

            progress_states = [{
                'progress-message': b' 100-percent',
                'progress-percent': 100,
                'progress-sequence': 1
            }]
            for actual_progress_state in out_watcher.retrieve_progress_states(
            ):
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 out_watcher.current_progress())
            self.assertFalse(progress_states)

            iterable = dn_watcher.retrieve_progress_states()
            exhausted = object()
            self.assertEqual(exhausted, next(iterable, exhausted))
            self.assertIsNone(dn_watcher.current_progress())

            print_thread.join()
        finally:
            completed.set()
            tu.cleanup_file(file_name)
예제 #3
0
    def test_collect_progress_updates_faulty_regex(self):
        file_name = tu.ensure_directory(
            'build/collect_progress_updates_skip_faulty.' +
            tu.get_random_task_id())
        progress_regex = '\^\^\^\^JOB-PROGRESS: (\S+)(?: )?(.*)'
        stop = Event()
        completed = Event()
        termination = Event()

        file = open(file_name, 'w+')
        file.flush()
        counter = cp.ProgressSequenceCounter()
        watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024,
                                     progress_regex, stop, completed,
                                     termination)

        try:

            def print_to_file():
                file.write('^^^^JOB-PROGRESS: ABCDEF string percent\n')
                file.write('^^^^JOB-PROGRESS: F50 Fifty percent\n')
                file.write(
                    '^^^^JOB-PROGRESS: 1019101010101010101010101018101101010101010110171010110 Sixty percent\n'
                )
                file.write('^^^^JOB-PROGRESS: 75 75% percent\n')
                file.flush()
                file.close()
                completed.set()

            print_thread = Thread(target=print_to_file, args=())
            print_thread.start()

            progress_states = [{
                'progress-message': b'75% percent',
                'progress-percent': 75,
                'progress-sequence': 1
            }]
            for actual_progress_state in watcher.retrieve_progress_states():
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 watcher.current_progress())
            self.assertFalse(progress_states)

            print_thread.join()
        finally:
            completed.set()
            tu.cleanup_file(file_name)
예제 #4
0
    def test_collect_progress_updates_with_empty_regex(self):
        file_name = tu.ensure_directory('build/collect_progress_test.' +
                                        tu.get_random_task_id())
        progress_regex = ''
        stop = Event()
        completed = Event()
        termination = Event()

        file = open(file_name, 'w+')
        file.flush()
        counter = cp.ProgressSequenceCounter()
        watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024,
                                     progress_regex, stop, completed,
                                     termination)

        try:

            def print_to_file():
                file.write('Stage One complete\n')
                file.write('^^^^JOB-PROGRESS: 25 Twenty-Five percent\n')
                file.write('Stage Two complete\n')
                file.write('^^^^JOB-PROGRESS: 50 Fifty percent\n')
                file.write('Stage Three complete\n')
                file.write('^^^^JOB-PROGRESS: 55.0 Fifty-five percent\n')
                file.write('Stage Four complete\n')
                file.write('^^^^JOB-PROGRESS: 100 100-percent\n')
                file.flush()
                file.close()
                completed.set()

            print_thread = Thread(target=print_to_file, args=())
            print_thread.start()

            progress_states = []
            for actual_progress_state in watcher.retrieve_progress_states():
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 watcher.current_progress())
            self.assertFalse(progress_states)
            self.assertIsNone(watcher.current_progress())
        finally:
            completed.set()
            tu.cleanup_file(file_name)
예제 #5
0
    def test_watcher_tail_with_read_limit(self):
        file_name = tu.ensure_directory('build/tail_progress_test.' +
                                        tu.get_random_task_id())
        stop = Event()
        completed = Event()
        termination = Event()
        tail_sleep_ms = 25

        try:

            def write_to_file():
                file = open(file_name, 'w+')

                file.write('abcd\n')
                file.flush()

                file.write('abcdefghijkl\n')
                file.flush()

                file.write('abcdefghijklmnopqrstuvwxyz\n')
                file.flush()

                file.close()
                time.sleep(0.15)
                completed.set()

            Thread(target=write_to_file, args=()).start()

            counter = cp.ProgressSequenceCounter()
            watcher = cp.ProgressWatcher(file_name, 'test', counter, 10, '',
                                         stop, completed, termination)
            collected_data = []
            for line in watcher.tail(tail_sleep_ms):
                collected_data.append(line.strip())

            logging.debug('collected_data = {}'.format(collected_data))
            expected_data = [
                b'abcd', b'abcdefghij', b'kl', b'abcdefghij', b'klmnopqrst',
                b'uvwxyz'
            ]
            self.assertEqual(expected_data, collected_data)
        finally:
            tu.cleanup_file(file_name)
예제 #6
0
    def test_watcher_tail_lot_of_writes(self):
        file_name = tu.ensure_directory('build/tail_progress_test.' +
                                        tu.get_random_task_id())
        items_to_write = 250000
        stop = Event()
        completed = Event()
        termination = Event()
        tail_sleep_ms = 25

        try:

            def write_to_file():
                file = open(file_name, 'w+')
                for item in range(items_to_write):
                    file.write('line-{}\n'.format(item))
                    if item % 100 == 0:
                        file.flush()
                file.flush()
                file.close()
                time.sleep(0.15)
                completed.set()

            Thread(target=write_to_file, args=()).start()

            counter = cp.ProgressSequenceCounter()
            watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024, '',
                                         stop, completed, termination)
            collected_data = []
            for line in watcher.tail(tail_sleep_ms):
                collected_data.append(line.strip())

            logging.info('Items read: {}'.format(len(collected_data)))
            if items_to_write != len(collected_data):
                for index in range(len(collected_data)):
                    logging.info('{}: {}'.format(index, collected_data[index]))
            self.assertEqual(items_to_write, len(collected_data))
            expected_data = list(
                map(lambda x: str.encode('line-{}'.format(x)),
                    range(items_to_write)))
            self.assertEqual(expected_data, collected_data)
        finally:
            tu.cleanup_file(file_name)
예제 #7
0
    def test_watcher_tail(self):
        file_name = tu.ensure_directory('build/tail_progress_test.' +
                                        tu.get_random_task_id())
        items_to_write = 12
        stop = Event()
        completed = Event()
        termination = Event()
        write_sleep_ms = 50
        tail_sleep_ms = 25

        try:

            def write_to_file():
                file = open(file_name, 'w+')
                for item in range(items_to_write):
                    time.sleep(write_sleep_ms / 1000.0)
                    file.write('{}\n'.format(item))
                    file.flush()
                file.close()
                time.sleep(0.15)
                completed.set()

            Thread(target=write_to_file, args=()).start()

            counter = cp.ProgressSequenceCounter()
            watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024, '',
                                         stop, completed, termination)
            collected_data = []
            for line in watcher.tail(tail_sleep_ms):
                collected_data.append(line.strip())

            self.assertEqual(items_to_write, len(collected_data))
            self.assertEqual(
                list(map(lambda x: str.encode(str(x)), range(items_to_write))),
                collected_data)
        finally:
            tu.cleanup_file(file_name)
예제 #8
0
def manage_task(driver, task, stop_signal, completed_signal, config):
    """Manages the execution of a task waiting for it to terminate normally or be killed.
       It also sends the task status updates, sandbox location and exit code back to the scheduler.
       Progress updates are tracked on a separate thread and are also sent to the scheduler.
       Setting the stop_signal will trigger termination of the task and associated cleanup.

    Returns
    -------
    Nothing
    """
    launched_process = None
    task_id = get_task_id(task)
    cio.print_and_log('Starting task {}'.format(task_id))
    status_updater = StatusUpdater(driver, task_id)

    inner_os_error_handler = functools.partial(os_error_handler, stop_signal,
                                               status_updater)
    try:
        # not yet started to run the task
        status_updater.update_status(cook.TASK_STARTING)

        # Use MESOS_DIRECTORY instead of MESOS_SANDBOX, to report the sandbox location outside of the container
        sandbox_message = {
            'sandbox-directory': config.mesos_directory,
            'task-id': task_id,
            'type': 'directory'
        }
        send_message(driver, inner_os_error_handler, sandbox_message)

        environment = retrieve_process_environment(config, task, os.environ)
        launched_process = launch_task(task, environment)
        if launched_process:
            # task has begun running successfully
            status_updater.update_status(cook.TASK_RUNNING)
            cio.print_and_log('Forked command at {}'.format(
                launched_process.pid))
        else:
            # task launch failed, report an error
            logging.error('Error in launching task')
            status_updater.update_status(cook.TASK_ERROR,
                                         reason=cook.REASON_TASK_INVALID)
            return

        task_completed_signal = Event(
        )  # event to track task execution completion
        sequence_counter = cp.ProgressSequenceCounter()

        send_progress_message = functools.partial(send_message, driver,
                                                  inner_os_error_handler)
        max_message_length = config.max_message_length
        sample_interval_ms = config.progress_sample_interval_ms
        progress_updater = cp.ProgressUpdater(task_id, max_message_length,
                                              sample_interval_ms,
                                              send_progress_message)
        progress_termination_signal = Event()

        def launch_progress_tracker(progress_location, location_tag):
            logging.info('Location {} tagged as [tag={}]'.format(
                progress_location, location_tag))
            progress_tracker = cp.ProgressTracker(
                config, stop_signal, task_completed_signal, sequence_counter,
                progress_updater, progress_termination_signal,
                progress_location, location_tag, inner_os_error_handler)
            progress_tracker.start()
            return progress_tracker

        progress_locations = {
            config.progress_output_name: 'progress',
            config.stderr_file(): 'stderr',
            config.stdout_file(): 'stdout'
        }
        logging.info('Progress will be tracked from {} locations'.format(
            len(progress_locations)))
        progress_trackers = [
            launch_progress_tracker(l, progress_locations[l])
            for l in progress_locations
        ]

        await_process_completion(launched_process, stop_signal,
                                 config.shutdown_grace_period_ms)
        task_completed_signal.set()

        progress_termination_timer = Timer(
            config.shutdown_grace_period_ms / 1000.0,
            progress_termination_signal.set)
        progress_termination_timer.daemon = True
        progress_termination_timer.start()

        # propagate the exit code
        exit_code = launched_process.returncode
        cio.print_and_log('Command exited with status {} (pid: {})'.format(
            exit_code, launched_process.pid))

        exit_message = {'exit-code': exit_code, 'task-id': task_id}
        send_message(driver, inner_os_error_handler, exit_message)

        # await progress updater termination if executor is terminating normally
        if not stop_signal.isSet():
            logging.info('Awaiting completion of progress updaters')
            [progress_tracker.wait() for progress_tracker in progress_trackers]
            logging.info('Progress updaters completed')

        # force send the latest progress state if available
        [
            progress_tracker.force_send_progress_update()
            for progress_tracker in progress_trackers
        ]

        # task either completed successfully or aborted with an error
        task_state = get_task_state(exit_code)
        output_task_completion(task_id, task_state)
        status_updater.update_status(task_state)

    except Exception as exception:
        if cu.is_out_of_memory_error(exception):
            inner_os_error_handler(exception)
        else:
            # task aborted with an error
            logging.exception('Error in executing task')
            output_task_completion(task_id, cook.TASK_FAILED)
            status_updater.update_status(
                cook.TASK_FAILED, reason=cook.REASON_EXECUTOR_TERMINATED)

    finally:
        # ensure completed_signal is set so driver can stop
        completed_signal.set()
        if launched_process and cs.is_process_running(launched_process):
            cs.send_signal(launched_process.pid, signal.SIGKILL)
예제 #9
0
    def test_collect_progress_updates_lots_of_writes(self):
        file_name = tu.ensure_directory('build/collect_progress_test.' +
                                        tu.get_random_task_id())
        progress_regex = 'progress: ([0-9]*\.?[0-9]+), (.*)'
        items_to_write = 250000
        stop = Event()
        completed = Event()
        termination = Event()

        def write_to_file():
            target_file = open(file_name, 'w+')
            unit_progress_granularity = int(items_to_write / 100)

            for item in range(items_to_write):
                remainder = (item + 1) % unit_progress_granularity
                if remainder == 0:
                    progress_percent = math.ceil(item /
                                                 unit_progress_granularity)
                    target_file.write(
                        'progress: {0}, completed-{0}-percent\n'.format(
                            progress_percent))
                    target_file.flush()
                target_file.write('{}\n'.format(item))
            target_file.flush()

            target_file.close()
            time.sleep(0.15)

        write_thread = Thread(target=write_to_file, args=())
        write_thread.daemon = True
        write_thread.start()

        counter = cp.ProgressSequenceCounter()
        watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024,
                                     progress_regex, stop, completed,
                                     termination)

        try:
            progress_states = list(
                map(
                    lambda x: {
                        'progress-message':
                        'completed-{}-percent'.format(x).encode(),
                        'progress-percent':
                        x,
                        'progress-sequence':
                        x
                    }, range(1, 101)))
            for actual_progress_state in watcher.retrieve_progress_states():
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 watcher.current_progress())
                if not progress_states:
                    completed.set()
            self.assertFalse(progress_states)

            write_thread.join()
        finally:
            completed.set()
            tu.cleanup_file(file_name)
예제 #10
0
    def test_progress_updates_early_termination(self):
        file_name = tu.ensure_directory('build/collect_progress_test.' +
                                        tu.get_random_task_id())
        progress_regex = '\^\^\^\^JOB-PROGRESS:\s+([0-9]*\.?[0-9]+)($|\s+.*)'
        stop = Event()
        completed = Event()
        termination = Event()
        termination_trigger = Event()

        file = open(file_name, 'w+')
        file.flush()
        counter = cp.ProgressSequenceCounter()
        watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024,
                                     progress_regex, stop, completed,
                                     termination)

        try:

            def print_to_file():
                file.write('Stage One complete\n')
                file.write('^^^^JOB-PROGRESS: 25 Twenty-Five\n')
                file.write('^^^^JOB-PROGRESS: 50 Fifty\n')
                file.flush()

                logging.info('Awaiting termination_trigger')
                termination_trigger.wait()
                logging.info('termination_trigger has been set')
                termination.set()

                file.write('Stage Three complete\n')
                file.write('^^^^JOB-PROGRESS: 55 Fifty-five\n')
                file.write('Stage Four complete\n')
                file.write('^^^^JOB-PROGRESS: 100 Hundred\n')
                file.flush()
                file.close()
                completed.set()

            print_thread = Thread(target=print_to_file, args=())
            print_thread.daemon = True
            print_thread.start()

            progress_states = [{
                'progress-message': b' Twenty-Five',
                'progress-percent': 25,
                'progress-sequence': 1
            }, {
                'progress-message': b' Fifty',
                'progress-percent': 50,
                'progress-sequence': 2
            }]
            for actual_progress_state in watcher.retrieve_progress_states():
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 watcher.current_progress())
                if expected_progress_state['progress-percent'] == 50:
                    termination_trigger.set()
            self.assertFalse(progress_states)

            print_thread.join()
        finally:
            completed.set()
            tu.cleanup_file(file_name)
예제 #11
0
    def test_collect_progress_updates_two_capture_groups(self):
        file_name = tu.ensure_directory('build/collect_progress_test.' +
                                        tu.get_random_task_id())
        progress_regex = '\^\^\^\^JOB-PROGRESS:\s+([0-9]*\.?[0-9]+)($|\s+.*)'
        stop = Event()
        completed = Event()
        termination = Event()

        file = open(file_name, 'w+')
        file.flush()
        counter = cp.ProgressSequenceCounter()
        watcher = cp.ProgressWatcher(file_name, 'test', counter, 1024,
                                     progress_regex, stop, completed,
                                     termination)

        try:

            def print_to_file():
                file.write('Stage One complete\n')
                file.write('^^^^JOB-PROGRESS: 25 Twenty-Five\n')
                file.write('^^^^JOB-PROGRESS: 50 Fifty\n')
                file.write('Stage Three complete\n')
                file.write('^^^^JOB-PROGRESS: 55.0 Fifty-five\n')
                file.write('^^^^JOB-PROGRESS: 65.8 Sixty-six\n')
                file.write('Stage Four complete\n')
                file.write('^^^^JOB-PROGRESS: 100 Hundred\n')
                file.write('^^^^JOB-PROGRESS: 100.1 Over a hundred\n')
                file.flush()
                file.close()

            print_thread = Thread(target=print_to_file, args=())
            print_thread.start()

            progress_states = [{
                'progress-message': b' Twenty-Five',
                'progress-percent': 25,
                'progress-sequence': 1
            }, {
                'progress-message': b' Fifty',
                'progress-percent': 50,
                'progress-sequence': 2
            }, {
                'progress-message': b' Fifty-five',
                'progress-percent': 55,
                'progress-sequence': 3
            }, {
                'progress-message': b' Sixty-six',
                'progress-percent': 66,
                'progress-sequence': 4
            }, {
                'progress-message': b' Hundred',
                'progress-percent': 100,
                'progress-sequence': 5
            }]
            for actual_progress_state in watcher.retrieve_progress_states():
                expected_progress_state = progress_states.pop(0)
                self.assertEqual(expected_progress_state,
                                 actual_progress_state)
                self.assertEqual(expected_progress_state,
                                 watcher.current_progress())
                if not progress_states:
                    completed.set()
            self.assertFalse(progress_states)

            print_thread.join()
        finally:
            completed.set()
            tu.cleanup_file(file_name)