Esempio n. 1
0
    def run(self):
        queued_jobs = []
        active_jobs = []
        completed_jobs = {}
        failed_jobs = []
        warned_jobs = []

        cumulative_times = dict()
        start_times = dict()
        active_stages = dict()

        start_time = self.pre_start_time or time.time()
        last_update_time = time.time()

        # If the status rate is too low, just disable it
        if self.active_status_rate < 1E-3:
            self.show_active_status = False
        else:
            update_duration = 1.0 / self.active_status_rate

        # Disable the wide log padding if the status is disabled
        if not self.show_active_status:
            disable_wide_log()

        while True:
            # Check if we should stop
            if not self.keep_running:
                wide_log(
                    clr('[{}] An internal error occurred!').format(self.label))
                return

            # Write a continuously-updated status line
            if self.show_active_status:

                # Try to get an event from the queue (non-blocking)
                try:
                    event = self.event_queue.get(False)
                except Empty:
                    # Determine if the status should be shown based on the desired
                    # status rate
                    elapsed_time = time.time() - last_update_time
                    show_status_now = elapsed_time > update_duration

                    if show_status_now:
                        # Print live status (overwrites last line)
                        status_line = clr(
                            '[{} {} s] [{}/{} complete] [{}/{} jobs] [{} queued]'
                        ).format(
                            self.label,
                            format_time_delta_short(time.time() - start_time),
                            len(completed_jobs), len(self.jobs),
                            job_server.running_jobs(), job_server.max_jobs(),
                            len(queued_jobs) + len(active_jobs) -
                            len(active_stages))

                        # Show failed jobs
                        if len(failed_jobs) > 0:
                            status_line += clr(
                                ' [@!@{rf}{}@| @{rf}failed@|]').format(
                                    len(failed_jobs))

                        # Check load / mem
                        if not job_server.load_ok():
                            status_line += clr(' [@!@{rf}High Load@|]')
                        if not job_server.mem_ok():
                            status_line += clr(' [@!@{rf}Low Memory@|]')

                        # Add active jobs
                        if len(active_jobs) == 0:
                            status_line += clr(
                                ' @/@!@{kf}Waiting for jobs...@|')
                        else:
                            active_labels = []

                            for j, (s, t, p) in active_stages.items():
                                d = format_time_delta_short(
                                    cumulative_times[j] + time.time() - t)
                                if p == '':
                                    active_labels.append(
                                        clr('[{}:{} - {}]').format(j, s, d))
                                else:
                                    active_labels.append(
                                        clr('[{}:{} ({}%) - {}]').format(
                                            j, s, p, d))

                            status_line += ' ' + ' '.join(active_labels)

                        # Print the status line
                        # wide_log(status_line)
                        wide_log(status_line, rhs='', end='\r')
                        sys.stdout.flush()

                        # Store this update time
                        last_update_time = time.time()
                    else:
                        time.sleep(
                            max(0.0, min(update_duration - elapsed_time,
                                         0.01)))

                    # Only continue when no event was received
                    continue
            else:
                # Try to get an event from the queue (blocking)
                try:
                    event = self.event_queue.get(True)
                except Empty:
                    break

            # A `None` event is a signal to terminate
            if event is None:
                break

            # Handle the received events
            eid = event.event_id

            if 'JOB_STATUS' == eid:
                queued_jobs = event.data['queued']
                active_jobs = event.data['active']
                completed_jobs = event.data['completed']

                # Check if all jobs have finished in some way
                if all([
                        len(event.data[t]) == 0
                        for t in ['pending', 'queued', 'active']
                ]):
                    break

            elif 'STARTED_JOB' == eid:
                cumulative_times[event.data['job_id']] = 0.0
                wide_log(
                    clr('Starting >>> {:<{}}').format(event.data['job_id'],
                                                      self.max_jid_length))

            elif 'FINISHED_JOB' == eid:
                duration = format_time_delta(
                    cumulative_times[event.data['job_id']])

                if event.data['succeeded']:
                    wide_log(
                        clr('Finished <<< {:<{}} [ {} ]').format(
                            event.data['job_id'], self.max_jid_length,
                            duration))
                else:
                    failed_jobs.append(event.data['job_id'])
                    wide_log(
                        clr('Failed <<< {:<{}} [ {} ]').format(
                            event.data['job_id'], self.max_jid_length,
                            duration))

            elif 'ABANDONED_JOB' == eid:
                # Create a human-readable reason string
                if 'DEP_FAILED' == event.data['reason']:
                    direct = event.data['dep_job_id'] == event.data[
                        'direct_dep_job_id']
                    if direct:
                        reason = clr('Depends on failed job {}').format(
                            event.data['dep_job_id'])
                    else:
                        reason = clr('Depends on failed job {} via {}').format(
                            event.data['dep_job_id'],
                            event.data['direct_dep_job_id'])
                elif 'PEER_FAILED' == event.data['reason']:
                    reason = clr('Unrelated job failed')
                elif 'MISSING_DEPS' == event.data['reason']:
                    reason = clr('Depends on unknown jobs: {}').format(
                        ', '.join([
                            clr('@!{}@|').format(jid)
                            for jid in event.data['dep_ids']
                        ]))

                wide_log(
                    clr('Abandoned <<< {:<{}} [ {} ]').format(
                        event.data['job_id'], self.max_jid_length, reason))

            elif 'STARTED_STAGE' == eid:
                active_stages[event.data['job_id']] = [
                    event.data['stage_label'], event.time, ''
                ]
                start_times[event.data['job_id']] = event.time

                if self.show_stage_events:
                    wide_log(
                        clr('Starting >> {}:{}').format(
                            event.data['job_id'], event.data['stage_label']))

            elif 'STAGE_PROGRESS' == eid:
                active_stages[event.data['job_id']][2] = event.data['percent']

            elif 'SUBPROCESS' == eid:
                if self.show_stage_events:
                    wide_log(
                        clr('Subprocess > {}:{} `{}`').format(
                            event.data['job_id'], event.data['stage_label'],
                            event.data['stage_repro']))

            elif 'FINISHED_STAGE' == eid:
                # Get the stage duration
                duration = event.time - start_times[event.data['job_id']]
                cumulative_times[event.data['job_id']] += duration

                # This is no longer the active stage for this job
                del active_stages[event.data['job_id']]

                header_border = None
                header_border_file = sys.stdout
                header_title = None
                header_title_file = sys.stdout
                lines = []
                footer_title = None
                footer_title_file = sys.stdout
                footer_border = None
                footer_border_file = sys.stdout

                # Generate headers / borders for output
                if event.data['succeeded']:
                    footer_title = clr('Finished << {}:{}').format(
                        event.data['job_id'], event.data['stage_label'])

                    if len(event.data['stderr']) > 0:
                        # Mark that this job warned about something
                        if event.data['job_id'] not in warned_jobs:
                            warned_jobs.append(event.data['job_id'])

                        # Output contains warnings
                        header_border = clr('@!@{yf}' + '_' *
                                            (terminal_width() - 1) + '@|')
                        header_border_file = sys.stderr
                        header_title = clr('Warnings << {}:{} {}').format(
                            event.data['job_id'], event.data['stage_label'],
                            event.data['logfile_filename'])
                        header_title_file = sys.stderr
                        footer_border = clr('@{yf}' + '.' *
                                            (terminal_width() - 1) + '@|')
                        footer_border_file = sys.stderr
                    else:
                        # Normal output, no warnings
                        header_title = clr('Output << {}:{} {}').format(
                            event.data['job_id'], event.data['stage_label'],
                            event.data['logfile_filename'])

                    # Don't print footer title
                    if not self.show_stage_events:
                        footer_title = None
                else:
                    # Output contains errors
                    header_border = clr('@!@{rf}' + '_' *
                                        (terminal_width() - 1) + '@|')
                    header_border_file = sys.stderr
                    header_title = clr('Errors << {}:{} {}').format(
                        event.data['job_id'], event.data['stage_label'],
                        event.data['logfile_filename'])
                    header_title_file = sys.stderr
                    footer_border = clr('@{rf}' + '.' *
                                        (terminal_width() - 1) + '@|')
                    footer_border_file = sys.stderr

                    footer_title = clr(
                        'Failed << {}:{:<{}} [ Exited with code {} ]').format(
                            event.data['job_id'], event.data['stage_label'],
                            max(
                                0, self.max_jid_length -
                                len(event.data['job_id'])),
                            event.data['retcode'])
                    footer_title_file = sys.stderr

                lines_target = sys.stdout
                if self.show_buffered_stdout:
                    if len(event.data['interleaved']) > 0:
                        lines = [
                            line for line in
                            event.data['interleaved'].splitlines(True)
                            if (self.show_compact_io is False
                                or len(line.strip()) > 0)
                        ]
                    else:
                        header_border = None
                        header_title = None
                        footer_border = None
                elif self.show_buffered_stderr:
                    if len(event.data['stderr']) > 0:
                        lines = [
                            line
                            for line in event.data['stderr'].splitlines(True)
                            if (self.show_compact_io is False
                                or len(line.strip()) > 0)
                        ]
                        lines_target = sys.stderr
                    else:
                        header_border = None
                        header_title = None
                        footer_border = None

                if len(lines) > 0:
                    if self.show_repro_cmd:
                        if event.data['repro'] is not None:
                            lines.append(
                                clr('@!@{kf}{}@|\n').format(
                                    event.data['repro']))

                    # Print the output
                    if header_border:
                        wide_log(header_border, file=header_border_file)
                    if header_title:
                        wide_log(header_title, file=header_title_file)
                    if len(lines) > 0:
                        wide_log(''.join(lines), end='\r', file=lines_target)
                    if footer_border:
                        wide_log(footer_border, file=footer_border_file)
                    if footer_title:
                        wide_log(footer_title, file=footer_title_file)

            elif 'STDERR' == eid:
                if self.show_live_stderr and len(event.data['data']) > 0:
                    wide_log(self.format_interleaved_lines(event.data),
                             end='\r',
                             file=sys.stderr)

            elif 'STDOUT' == eid:
                if self.show_live_stdout and len(event.data['data']) > 0:
                    wide_log(self.format_interleaved_lines(event.data),
                             end='\r')

            elif 'MESSAGE' == eid:
                wide_log(event.data['msg'])

        # Print the full summary
        if self.show_full_summary:
            self.print_exec_summary(completed_jobs, warned_jobs, failed_jobs)

        # Print a compact summary
        if self.show_summary or self.show_full_summary:
            self.print_compact_summary(completed_jobs, warned_jobs,
                                       failed_jobs)

        # Print final runtime
        wide_log(
            clr('[{}] Runtime: {} total.').format(
                self.label, format_time_delta(time.time() - start_time)))
Esempio n. 2
0
    def run(self):
        queued_jobs = []
        active_jobs = []
        completed_jobs = {}
        failed_jobs = []
        warned_jobs = []

        cumulative_times = dict()
        start_times = dict()
        active_stages = dict()

        start_time = self.pre_start_time or time.time()
        last_update_time = time.time()

        # If the status rate is too low, just disable it
        if self.active_status_rate < 1E-3:
            self.show_active_status = False
        else:
            update_duration = 1.0 / self.active_status_rate

        # Disable the wide log padding if the status is disabled
        if not self.show_active_status:
            disable_wide_log()

        while True:
            # Check if we should stop
            if not self.keep_running:
                wide_log(clr('[{}] An internal error occurred!').format(self.label))
                return

            # Write a continuously-updated status line
            if self.show_active_status:

                # Try to get an event from the queue (non-blocking)
                try:
                    event = self.event_queue.get(False)
                except Empty:
                    # Determine if the status should be shown based on the desired
                    # status rate
                    elapsed_time = time.time() - last_update_time
                    show_status_now = elapsed_time > update_duration

                    if show_status_now:
                        # Print live status (overwrites last line)
                        status_line = clr('[{} {} s] [{}/{} complete] [{}/{} jobs] [{} queued]').format(
                            self.label,
                            format_time_delta_short(time.time() - start_time),
                            len(completed_jobs),
                            len(self.jobs),
                            job_server.running_jobs(),
                            job_server.max_jobs(),
                            len(queued_jobs) + len(active_jobs) - len(active_stages)
                        )

                        # Show failed jobs
                        if len(failed_jobs) > 0:
                            status_line += clr(' [@!@{rf}{}@| @{rf}failed@|]').format(len(failed_jobs))

                        # Check load / mem
                        if not job_server.load_ok():
                            status_line += clr(' [@!@{rf}High Load@|]')
                        if not job_server.mem_ok():
                            status_line += clr(' [@!@{rf}Low Memory@|]')

                        # Add active jobs
                        if len(active_jobs) == 0:
                            status_line += clr(' @/@!@{kf}Waiting for jobs...@|')
                        else:
                            active_labels = []

                            for j, (s, t, p) in active_stages.items():
                                d = format_time_delta_short(cumulative_times[j] + time.time() - t)
                                if p == '':
                                    active_labels.append(clr('[{}:{} - {}]').format(j, s, d))
                                else:
                                    active_labels.append(clr('[{}:{} ({}%) - {}]').format(j, s, p, d))

                            status_line += ' ' + ' '.join(active_labels)

                        # Print the status line
                        # wide_log(status_line)
                        wide_log(status_line, rhs='', end='\r')
                        sys.stdout.flush()

                        # Store this update time
                        last_update_time = time.time()
                    else:
                        time.sleep(max(0.0, min(update_duration - elapsed_time, 0.01)))

                    # Only continue when no event was received
                    continue
            else:
                # Try to get an event from the queue (blocking)
                try:
                    event = self.event_queue.get(True)
                except Empty:
                    break

            # A `None` event is a signal to terminate
            if event is None:
                break

            # Handle the received events
            eid = event.event_id

            if 'JOB_STATUS' == eid:
                queued_jobs = event.data['queued']
                active_jobs = event.data['active']
                completed_jobs = event.data['completed']

                # Check if all jobs have finished in some way
                if all([len(event.data[t]) == 0 for t in ['pending', 'queued', 'active']]):
                    break

            elif 'STARTED_JOB' == eid:
                cumulative_times[event.data['job_id']] = 0.0
                wide_log(clr('Starting >>> {:<{}}').format(
                    event.data['job_id'],
                    self.max_jid_length))

            elif 'FINISHED_JOB' == eid:
                duration = format_time_delta(cumulative_times[event.data['job_id']])

                if event.data['succeeded']:
                    wide_log(clr('Finished <<< {:<{}} [ {} ]').format(
                        event.data['job_id'],
                        self.max_jid_length,
                        duration))
                else:
                    failed_jobs.append(event.data['job_id'])
                    wide_log(clr('Failed <<< {:<{}} [ {} ]').format(
                        event.data['job_id'],
                        self.max_jid_length,
                        duration))

            elif 'ABANDONED_JOB' == eid:
                # Create a human-readable reason string
                if 'DEP_FAILED' == event.data['reason']:
                    direct = event.data['dep_job_id'] == event.data['direct_dep_job_id']
                    if direct:
                        reason = clr('Depends on failed job {}').format(event.data['dep_job_id'])
                    else:
                        reason = clr('Depends on failed job {} via {}').format(
                            event.data['dep_job_id'],
                            event.data['direct_dep_job_id'])
                elif 'PEER_FAILED' == event.data['reason']:
                    reason = clr('Unrelated job failed')
                elif 'MISSING_DEPS' == event.data['reason']:
                    reason = clr('Depends on unknown jobs: {}').format(
                        ', '.join([clr('@!{}@|').format(jid) for jid in event.data['dep_ids']]))

                wide_log(clr('Abandoned <<< {:<{}} [ {} ]').format(
                    event.data['job_id'],
                    self.max_jid_length,
                    reason))

            elif 'STARTED_STAGE' == eid:
                active_stages[event.data['job_id']] = [event.data['stage_label'], event.time, '']
                start_times[event.data['job_id']] = event.time

                if self.show_stage_events:
                    wide_log(clr('Starting >> {}:{}').format(
                        event.data['job_id'],
                        event.data['stage_label']))

            elif 'STAGE_PROGRESS' == eid:
                active_stages[event.data['job_id']][2] = event.data['percent']

            elif 'SUBPROCESS' == eid:
                if self.show_stage_events:
                    wide_log(clr('Subprocess > {}:{} `{}`').format(
                        event.data['job_id'],
                        event.data['stage_label'],
                        event.data['stage_repro']))

            elif 'FINISHED_STAGE' == eid:
                # Get the stage duration
                duration = event.time - start_times[event.data['job_id']]
                cumulative_times[event.data['job_id']] += duration

                # This is no longer the active stage for this job
                del active_stages[event.data['job_id']]

                header_border = None
                header_border_file = sys.stdout
                header_title = None
                header_title_file = sys.stdout
                lines = []
                footer_title = None
                footer_title_file = sys.stdout
                footer_border = None
                footer_border_file = sys.stdout

                # Generate headers / borders for output
                if event.data['succeeded']:
                    footer_title = clr(
                        'Finished << {}:{}').format(
                            event.data['job_id'],
                            event.data['stage_label'])

                    if len(event.data['stderr']) > 0:
                        # Mark that this job warned about something
                        if event.data['job_id'] not in warned_jobs:
                            warned_jobs.append(event.data['job_id'])

                        # Output contains warnings
                        header_border = clr('@!@{yf}' + '_' * (terminal_width() - 1) + '@|')
                        header_border_file = sys.stderr
                        header_title = clr(
                            'Warnings << {}:{} {}').format(
                                event.data['job_id'],
                                event.data['stage_label'],
                                event.data['logfile_filename'])
                        header_title_file = sys.stderr
                        footer_border = clr('@{yf}' + '.' * (terminal_width() - 1) + '@|')
                        footer_border_file = sys.stderr
                    else:
                        # Normal output, no warnings
                        header_title = clr(
                            'Output << {}:{} {}').format(
                                event.data['job_id'],
                                event.data['stage_label'],
                                event.data['logfile_filename'])

                    # Don't print footer title
                    if not self.show_stage_events:
                        footer_title = None
                else:
                    # Output contains errors
                    header_border = clr('@!@{rf}' + '_' * (terminal_width() - 1) + '@|')
                    header_border_file = sys.stderr
                    header_title = clr(
                        'Errors << {}:{} {}').format(
                            event.data['job_id'],
                            event.data['stage_label'],
                            event.data['logfile_filename'])
                    header_title_file = sys.stderr
                    footer_border = clr('@{rf}' + '.' * (terminal_width() - 1) + '@|')
                    footer_border_file = sys.stderr

                    footer_title = clr(
                        'Failed << {}:{:<{}} [ Exited with code {} ]').format(
                            event.data['job_id'],
                            event.data['stage_label'],
                            max(0, self.max_jid_length - len(event.data['job_id'])),
                            event.data['retcode'])
                    footer_title_file = sys.stderr

                lines_target = sys.stdout
                if self.show_buffered_stdout:
                    if len(event.data['interleaved']) > 0:
                        lines = [
                            l
                            for l in event.data['interleaved'].splitlines(True)
                            if (self.show_compact_io is False or len(l.strip()) > 0)
                        ]
                    else:
                        header_border = None
                        header_title = None
                        footer_border = None
                elif self.show_buffered_stderr:
                    if len(event.data['stderr']) > 0:
                        lines = [
                            l
                            for l in event.data['stderr'].splitlines(True)
                            if (self.show_compact_io is False or len(l.strip()) > 0)
                        ]
                        lines_target = sys.stderr
                    else:
                        header_border = None
                        header_title = None
                        footer_border = None

                if len(lines) > 0:
                    if self.show_repro_cmd:
                        if event.data['repro'] is not None:
                            lines.append(clr('@!@{kf}{}@|\n').format(event.data['repro']))

                    # Print the output
                    if header_border:
                        wide_log(header_border, file=header_border_file)
                    if header_title:
                        wide_log(header_title, file=header_title_file)
                    if len(lines) > 0:
                        wide_log(''.join(lines), end='\r', file=lines_target)
                    if footer_border:
                        wide_log(footer_border, file=footer_border_file)
                    if footer_title:
                        wide_log(footer_title, file=footer_title_file)

            elif 'STDERR' == eid:
                if self.show_live_stderr and len(event.data['data']) > 0:
                    wide_log(self.format_interleaved_lines(event.data), end='\r', file=sys.stderr)

            elif 'STDOUT' == eid:
                if self.show_live_stdout and len(event.data['data']) > 0:
                    wide_log(self.format_interleaved_lines(event.data), end='\r')

            elif 'MESSAGE' == eid:
                wide_log(event.data['msg'])

        # Print the full summary
        if self.show_full_summary:
            self.print_exec_summary(completed_jobs, warned_jobs, failed_jobs)

        # Print a compact summary
        if self.show_summary or self.show_full_summary:
            self.print_compact_summary(completed_jobs, warned_jobs, failed_jobs)

        # Print final runtime
        wide_log(clr('[{}] Runtime: {} total.').format(
            self.label,
            format_time_delta(time.time() - start_time)))