Example #1
0
def build_email_body(ctx, stack, sleep_time_sec):
    email_template_path = os.path.dirname(__file__) + \
            '/templates/email-sleep-before-teardown.jinja2'

    with open(email_template_path) as f:
        template_text = f.read()

    email_template = jinja2.Template(template_text)
    archive_path = ctx.config.get('archive_path')
    job_id = ctx.config.get('job_id')
    status = get_status(ctx.summary)
    stack_path = '/'.join(task for task, _ in stack)
    suite_name = ctx.config.get('suite')
    sleep_date = time.time()
    sleep_date_str = time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.gmtime(sleep_date))

    body = email_template.render(
        sleep_time=format_timespan(sleep_time_sec),
        sleep_time_sec=sleep_time_sec,
        sleep_date=sleep_date_str,
        owner=ctx.owner,
        run_name=ctx.name,
        job_id=ctx.config.get('job_id'),
        job_info=get_results_url(ctx.name),
        job_logs=get_http_log_path(archive_path, job_id),
        suite_name=suite_name,
        status=status,
        task_stack=stack_path,
    )
    subject = ('teuthology job {run}/{job} has fallen asleep at {date}'.format(
        run=ctx.name, job=job_id, date=sleep_date_str))
    return (subject.strip(), body.strip())
Example #2
0
def build_rocketchat_message(ctx, stack, sleep_time_sec, template_path=None):
    message_template_path = template_path or os.path.dirname(__file__) + \
            '/templates/rocketchat-sleep-before-teardown.jinja2'

    with open(message_template_path) as f:
        template_text = f.read()

    template = jinja2.Template(template_text)
    archive_path = ctx.config.get('archive_path')
    job_id = ctx.config.get('job_id')
    status = get_status(ctx.summary)
    stack_path = ' -> '.join(task for task, _ in stack)
    suite_name = ctx.config.get('suite')
    sleep_date = time.time()
    sleep_date_str = time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.gmtime(sleep_date))

    message = template.render(
        sleep_time=format_timespan(sleep_time_sec),
        sleep_time_sec=sleep_time_sec,
        sleep_date=sleep_date_str,
        owner=ctx.owner,
        run_name=ctx.name,
        job_id=ctx.config.get('job_id'),
        job_desc=ctx.config.get('description'),
        job_info=get_results_url(ctx.name, job_id),
        job_logs=get_http_log_path(archive_path, job_id),
        suite_name=suite_name,
        status=status,
        task_stack=stack_path,
    )
    return message
Example #3
0
def test_get_http_log_path():
    # Fake configuration
    archive_server = "http://example.com/server_root"
    config.archive_server = archive_server
    archive_dir = "/var/www/archives"

    path = misc.get_http_log_path(archive_dir)
    assert path == "http://example.com/server_root/archives/"

    job_id = '12345'
    path = misc.get_http_log_path(archive_dir, job_id)
    assert path == "http://example.com/server_root/archives/12345/"

    # Inktank configuration
    archive_server = "http://qa-proxy.ceph.com/teuthology/"
    config.archive_server = archive_server
    archive_dir = "/var/lib/teuthworker/archive/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps"
    job_id = 31087
    path = misc.get_http_log_path(archive_dir, job_id)
    assert path == "http://qa-proxy.ceph.com/teuthology/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps/31087/"

    path = misc.get_http_log_path(archive_dir)
    assert path == "http://qa-proxy.ceph.com/teuthology/teuthology-2013-09-12_11:49:50-ceph-deploy-master-testing-basic-vps/"
Example #4
0
def build_email_body(name, archive_dir, timeout):
    failed = {}
    hung = {}
    passed = {}

    for job in get_jobs(archive_dir):
        job_dir = os.path.join(archive_dir, job)
        summary_file = os.path.join(job_dir, 'summary.yaml')

        # Unfinished jobs will have no summary.yaml
        if not os.path.exists(summary_file):
            info_file = os.path.join(job_dir, 'info.yaml')

            desc = ''
            if os.path.exists(info_file):
                with file(info_file) as f:
                    info = yaml.safe_load(f)
                    desc = info['description']

            hung[job] = email_templates['hung_templ'].format(
                job_id=job,
                desc=desc,
            )
            continue

        with file(summary_file) as f:
            summary = yaml.safe_load(f)

        if summary['success']:
            passed[job] = email_templates['pass_templ'].format(
                job_id=job,
                desc=summary.get('description'),
                time=int(summary.get('duration', 0)),
            )
        else:
            log = misc.get_http_log_path(archive_dir, job)
            if log:
                log_line = email_templates['fail_log_templ'].format(log=log)
            else:
                log_line = ''
            sentry_events = summary.get('sentry_events')
            if sentry_events:
                sentry_line = email_templates['fail_sentry_templ'].format(
                    sentries='\n        '.join(sentry_events))
            else:
                sentry_line = ''

            # 'fill' is from the textwrap module and it collapses a given
            # string into multiple lines of a maximum width as specified. We
            # want 75 characters here so that when we indent by 4 on the next
            # line, we have 79-character exception paragraphs.
            reason = fill(summary.get('failure_reason'), 75)
            reason = '\n'.join(('    ') + line for line in reason.splitlines())

            failed[job] = email_templates['fail_templ'].format(
                job_id=job,
                desc=summary.get('description'),
                time=int(summary.get('duration', 0)),
                reason=reason,
                log_line=log_line,
                sentry_line=sentry_line,
            )

    maybe_comma = lambda s: ', ' if s else ' '

    subject = ''
    fail_sect = ''
    hung_sect = ''
    pass_sect = ''
    if failed:
        subject += '{num_failed} failed{sep}'.format(
            num_failed=len(failed),
            sep=maybe_comma(hung or passed)
        )
        fail_sect = email_templates['sect_templ'].format(
            title='Failed',
            jobs=''.join(failed.values())
        )
    if hung:
        subject += '{num_hung} hung{sep}'.format(
            num_hung=len(hung),
            sep=maybe_comma(passed),
        )
        hung_sect = email_templates['sect_templ'].format(
            title='Hung',
            jobs=''.join(hung.values()),
        )
    if passed:
        subject += '%s passed ' % len(passed)
        pass_sect = email_templates['sect_templ'].format(
            title='Passed',
            jobs=''.join(passed.values()),
        )

    body = email_templates['body_templ'].format(
        name=name,
        log_root=misc.get_http_log_path(archive_dir),
        fail_count=len(failed),
        hung_count=len(hung),
        pass_count=len(passed),
        fail_sect=fail_sect,
        hung_sect=hung_sect,
        pass_sect=pass_sect,
    )

    subject += 'in {suite}'.format(suite=name)
    return (subject.strip(), body.strip())
Example #5
0
def build_email_body(name, archive_dir, timeout):
    failed = {}
    hung = {}
    passed = {}

    for job in get_jobs(archive_dir):
        job_dir = os.path.join(archive_dir, job)
        summary_file = os.path.join(job_dir, 'summary.yaml')

        # Unfinished jobs will have no summary.yaml
        if not os.path.exists(summary_file):
            info_file = os.path.join(job_dir, 'info.yaml')

            desc = ''
            if os.path.exists(info_file):
                with file(info_file) as f:
                    info = yaml.safe_load(f)
                    desc = info['description']

            hung[job] = email_templates['hung_templ'].format(
                job_id=job,
                desc=desc,
            )
            continue

        with file(summary_file) as f:
            summary = yaml.safe_load(f)

        if summary['success']:
            passed[job] = email_templates['pass_templ'].format(
                job_id=job,
                desc=summary.get('description'),
                time=int(summary.get('duration', 0)),
            )
        else:
            log = misc.get_http_log_path(archive_dir, job)
            if log:
                log_line = email_templates['fail_log_templ'].format(log=log)
            else:
                log_line = ''
            sentry_events = summary.get('sentry_events')
            if sentry_events:
                sentry_line = email_templates['fail_sentry_templ'].format(
                    sentries='\n        '.join(sentry_events))
            else:
                sentry_line = ''

            # 'fill' is from the textwrap module and it collapses a given
            # string into multiple lines of a maximum width as specified. We
            # want 75 characters here so that when we indent by 4 on the next
            # line, we have 79-character exception paragraphs.
            reason = fill(summary.get('failure_reason'), 75)
            reason = '\n'.join(('    ') + line for line in reason.splitlines())

            failed[job] = email_templates['fail_templ'].format(
                job_id=job,
                desc=summary.get('description'),
                time=int(summary.get('duration', 0)),
                reason=reason,
                log_line=log_line,
                sentry_line=sentry_line,
            )

    maybe_comma = lambda s: ', ' if s else ' '

    subject = ''
    fail_sect = ''
    hung_sect = ''
    pass_sect = ''
    if failed:
        subject += '{num_failed} failed{sep}'.format(num_failed=len(failed),
                                                     sep=maybe_comma(
                                                         hung or passed))
        fail_sect = email_templates['sect_templ'].format(title='Failed',
                                                         jobs=''.join(
                                                             failed.values()))
    if hung:
        subject += '{num_hung} hung{sep}'.format(
            num_hung=len(hung),
            sep=maybe_comma(passed),
        )
        hung_sect = email_templates['sect_templ'].format(
            title='Hung',
            jobs=''.join(hung.values()),
        )
    if passed:
        subject += '%s passed ' % len(passed)
        pass_sect = email_templates['sect_templ'].format(
            title='Passed',
            jobs=''.join(passed.values()),
        )

    body = email_templates['body_templ'].format(
        name=name,
        log_root=misc.get_http_log_path(archive_dir),
        fail_count=len(failed),
        hung_count=len(hung),
        pass_count=len(passed),
        fail_sect=fail_sect,
        hung_sect=hung_sect,
        pass_sect=pass_sect,
    )

    subject += 'in {suite}'.format(suite=name)
    return (subject.strip(), body.strip())
Example #6
0
def run_tasks(tasks, ctx):
    archive_path = ctx.config.get('archive_path')
    if archive_path:
        timer = Timer(
            path=os.path.join(archive_path, 'timing.yaml'),
            sync=True,
        )
    else:
        timer = Timer()
    stack = []
    try:
        for taskdict in tasks:
            try:
                ((taskname, config), ) = taskdict.items()
            except (ValueError, AttributeError):
                raise RuntimeError('Invalid task definition: %s' % taskdict)
            log.info('Running task %s...', taskname)
            timer.mark('%s enter' % taskname)
            manager = run_one_task(taskname, ctx=ctx, config=config)
            if hasattr(manager, '__enter__'):
                stack.append((taskname, manager))
                manager.__enter__()
    except BaseException as e:
        if isinstance(e, ConnectionLostError):
            # Prevent connection issues being flagged as failures
            set_status(ctx.summary, 'dead')
        else:
            # the status may have been set to dead, leave it as-is if so
            if not ctx.summary.get('status', '') == 'dead':
                set_status(ctx.summary, 'fail')
        if 'failure_reason' not in ctx.summary:
            ctx.summary['failure_reason'] = str(e)
        log.exception('Saw exception from tasks.')

        if teuth_config.sentry_dsn:
            sentry_sdk.init(teuth_config.sentry_dsn)
            config = deepcopy(ctx.config)

            tags = {
                'task': taskname,
                'owner': ctx.owner,
            }
            optional_tags = ('teuthology_branch', 'branch', 'suite',
                             'machine_type', 'os_type', 'os_version')
            for tag in optional_tags:
                if tag in config:
                    tags[tag] = config[tag]

            # Remove ssh keys from reported config
            if 'targets' in config:
                targets = config['targets']
                for host in targets.keys():
                    targets[host] = '<redacted>'

            job_id = ctx.config.get('job_id')
            archive_path = ctx.config.get('archive_path')
            extras = dict(config=config, )
            if job_id:
                extras['logs'] = get_http_log_path(archive_path, job_id)

            fingerprint = e.fingerprint() if hasattr(e,
                                                     'fingerprint') else None
            exc_id = sentry_sdk.capture_exception(
                error=e,
                tags=tags,
                extras=extras,
                fingerprint=fingerprint,
            )
            event_url = "{server}/?query={id}".format(
                server=teuth_config.sentry_server.strip('/'), id=exc_id)
            log.exception(" Sentry event: %s" % event_url)
            ctx.summary['sentry_event'] = event_url

        if ctx.config.get('interactive-on-error'):
            ctx.config['interactive-on-error'] = False
            from teuthology.task import interactive
            log.warning(
                'Saw failure during task execution, going into interactive mode...'
            )
            interactive.task(ctx=ctx, config=None)
        # Throughout teuthology, (x,) = y has been used to assign values
        # from yaml files where only one entry of type y is correct.  This
        # causes failures with 'too many values to unpack.'  We want to
        # fail as before, but with easier to understand error indicators.
        if isinstance(e, ValueError):
            if str(e) == 'too many values to unpack':
                emsg = 'Possible configuration error in yaml file'
                log.error(emsg)
                ctx.summary['failure_info'] = emsg
    finally:
        try:
            exc_info = sys.exc_info()
            sleep_before_teardown = ctx.config.get('sleep_before_teardown')
            if sleep_before_teardown:
                log.info('Sleeping for {} seconds before unwinding because'
                         ' --sleep-before-teardown was given...'.format(
                             sleep_before_teardown))
                notify_sleep_before_teardown(ctx, stack, sleep_before_teardown)
                time.sleep(sleep_before_teardown)
            while stack:
                taskname, manager = stack.pop()
                log.debug('Unwinding manager %s', taskname)
                timer.mark('%s exit' % taskname)
                try:
                    suppress = manager.__exit__(*exc_info)
                except Exception as e:
                    if isinstance(e, ConnectionLostError):
                        # Prevent connection issues being flagged as failures
                        set_status(ctx.summary, 'dead')
                    else:
                        set_status(ctx.summary, 'fail')
                    if 'failure_reason' not in ctx.summary:
                        ctx.summary['failure_reason'] = str(e)
                    log.exception('Manager failed: %s', taskname)

                    if exc_info == (None, None, None):
                        # if first failure is in an __exit__, we don't
                        # have exc_info set yet
                        exc_info = sys.exc_info()

                    if ctx.config.get('interactive-on-error'):
                        from teuthology.task import interactive
                        log.warning(
                            'Saw failure during task cleanup, going into interactive mode...'
                        )
                        interactive.task(ctx=ctx, config=None)
                else:
                    if suppress:
                        exc_info = (None, None, None)

            if exc_info != (None, None, None):
                log.debug('Exception was not quenched, exiting: %s: %s',
                          exc_info[0].__name__, exc_info[1])
                raise SystemExit(1)
        finally:
            # be careful about cyclic references
            del exc_info
        timer.mark("tasks complete")