Esempio n. 1
0
def report_outcome(config, archive, summary, fake_ctx):
    """ Reports on the final outcome of the command. """
    status = get_status(summary)
    passed = status == 'pass'

    if not passed and bool(config.get('nuke-on-error')):
        # only unlock if we locked them in the first place
        nuke(fake_ctx, fake_ctx.lock)

    if archive is not None:
        with file(os.path.join(archive, 'summary.yaml'), 'w') as f:
            yaml.safe_dump(summary, f, default_flow_style=False)

    with contextlib.closing(StringIO.StringIO()) as f:
        yaml.safe_dump(summary, f)
        log.info('Summary data:\n%s' % f.getvalue())

    with contextlib.closing(StringIO.StringIO()) as f:
        if ('email-on-error' in config and not passed):
            yaml.safe_dump(summary, f)
            yaml.safe_dump(config, f)
            emsg = f.getvalue()
            subject = "Teuthology error -- %s" % summary['failure_reason']
            email_results(subject, "Teuthology", config['email-on-error'],
                          emsg)

    report.try_push_job_info(config, summary)

    if passed:
        log.info(status)
    else:
        log.info(str(status).upper())
        sys.exit(1)
Esempio n. 2
0
def unlock_targets(job_config):
    serializer = report.ResultsSerializer(teuth_config.archive_base)
    job_info = serializer.job_info(job_config['name'], job_config['job_id'])
    machine_statuses = query.get_statuses(job_info['targets'].keys())
    # only unlock/nuke targets if locked and description matches
    locked = []
    for status in machine_statuses:
        name = shortname(status['name'])
        description = status['description']
        if not status['locked']:
            continue
        if description != job_info['archive_path']:
            log.warning(
                "Was going to unlock %s but it was locked by another job: %s",
                name, description)
            continue
        locked.append(name)
    if not locked:
        return
    job_status = get_status(job_info)
    if job_status == 'pass' or \
            (job_config.get('unlock_on_failure', False) and not job_config.get('nuke-on-error', False)):
        log.info('Unlocking machines...')
        fake_ctx = create_fake_context(job_config)
        for machine in locked:
            teuthology.lock.ops.unlock_one(fake_ctx, machine,
                                           job_info['owner'],
                                           job_info['archive_path'])
    if job_status != 'pass' and job_config.get('nuke-on-error', False):
        log.info('Nuking machines...')
        fake_ctx = create_fake_context(job_config)
        nuke(fake_ctx, True)
Esempio n. 3
0
def reimage(job_config):
    # Reimage the targets specified in job config
    # and update their keys in config after reimaging
    ctx = create_fake_context(job_config)
    # change the status during the reimaging process
    report.try_push_job_info(ctx.config, dict(status='waiting'))
    targets = job_config['targets']
    try:
        reimaged = reimage_machines(ctx, targets, job_config['machine_type'])
    except Exception as e:
        log.exception('Reimaging error. Nuking machines...')
        # Reimage failures should map to the 'dead' status instead of 'fail'
        report.try_push_job_info(ctx.config, dict(status='dead', failure_reason='Error reimaging machines: ' + str(e)))
        nuke(ctx, True)
        raise
    ctx.config['targets'] = reimaged
    # change the status to running after the reimaging process
    report.try_push_job_info(ctx.config, dict(status='running'))
Esempio n. 4
0
def nuke(targets, owner, log, teuth_config, should_unlock,
         synch_clocks=True, reboot_all=True):
    from teuthology.nuke import nuke
    from teuthology.lock import unlock
    ctx = argparse.Namespace(
        config=dict(targets=targets),
        owner=owner,
        synch_clocks=synch_clocks,
        reboot_all=reboot_all,
        teuthology_config=teuth_config,
        )
    try:
        nuke(ctx, log)
    except:
        log.exception('Could not nuke all targets in %s', targets)
        # not re-raising the so that parallel calls aren't killed
    else:
        if should_unlock:
            for target in targets.keys():
                unlock(ctx, target, owner)
Esempio n. 5
0
def unlock_targets(job_config):
    serializer = report.ResultsSerializer(teuth_config.archive_base)
    job_info = serializer.job_info(job_config['name'], job_config['job_id'])
    machine_status = query.get_statuses(job_info['targets'].keys())
    # only unlock/nuke targets if locked in the first place
    locked = [shortname(_['name']) for _ in machine_status if _['locked']]
    if not locked:
        return
    job_status = get_status(job_info)
    if job_status == 'pass' or \
            (job_config.get('unlock_on_failure', False) and not job_config.get('nuke-on-error', False)):
        log.info('Unlocking machines...')
        fake_ctx = create_fake_context(job_config)
        for machine in locked:
            teuthology.lock.ops.unlock_one(fake_ctx, machine,
                                           job_info['owner'],
                                           job_info['archive_path'])
    if job_status != 'pass' and job_config.get('nuke-on-error', False):
        log.info('Nuking machines...')
        fake_ctx = create_fake_context(job_config)
        nuke(fake_ctx, True)
Esempio n. 6
0
def main():
    from gevent import monkey; monkey.patch_all(dns=False)
    from .orchestra import monkey; monkey.patch_all()

    import logging

    log = logging.getLogger(__name__)
    ctx = parse_args()

    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG

    logging.basicConfig(
        level=loglevel,
        )


    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)
       
    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()
    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    if ctx.archive is not None:
        os.mkdir(ctx.archive)

        handler = logging.FileHandler(
            filename=os.path.join(ctx.archive, 'teuthology.log'),
            )
        formatter = logging.Formatter(
            fmt='%(asctime)s.%(msecs)03d %(levelname)s:%(name)s:%(message)s',
            datefmt='%Y-%m-%dT%H:%M:%S',
            )
        handler.setFormatter(formatter)
        logging.getLogger().addHandler(handler)

        with file(os.path.join(ctx.archive, 'pid'), 'w') as f:
            f.write('%d' % os.getpid())

        with file(os.path.join(ctx.archive, 'owner'), 'w') as f:
            f.write(ctx.owner + '\n')

        with file(os.path.join(ctx.archive, 'orig.config.yaml'), 'w') as f:
            yaml.safe_dump(ctx.config, f, default_flow_style=False)

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({'internal.lock_machines': len(ctx.config['roles'])})

    init_tasks.extend([
            {'internal.save_config': None},
            {'internal.check_lock': None},
            {'internal.connect': None},
            {'internal.check_conflict': None},
            ])
    if 'kernel' in ctx.config:
        init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
            {'internal.base': None},
            {'internal.archive': None},
            {'internal.coredump': None},
            {'internal.syslog': None},
            {'internal.timer': None},
            ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)

    if not ctx.summary.get('success', True):
        import sys
        sys.exit(1)
Esempio n. 7
0
def main(args):
    # run dispatcher in job supervisor mode if --supervisor passed
    if args["--supervisor"]:
        return supervisor.main(args)

    verbose = args["--verbose"]
    tube = args["--tube"]
    log_dir = args["--log-dir"]
    archive_dir = args["--archive-dir"]

    if archive_dir is None:
        archive_dir = teuth_config.archive_base

    # setup logging for disoatcher in {log_dir}
    loglevel = logging.INFO
    if verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)
    log_file_path = os.path.join(log_dir, f"dispatcher.{tube}.{os.getpid()}")
    setup_log_file(log_file_path)
    install_except_hook()

    load_config(archive_dir=archive_dir)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                archive_dir,
            )
        except SkipJob:
            continue

        # lock machines but do not reimage them
        if 'roles' in job_config:
            job_config = lock_machines(job_config)

        run_args = [
            os.path.join(teuth_bin_path, 'teuthology-dispatcher'),
            '--supervisor',
            '-v',
            '--bin-path',
            teuth_bin_path,
            '--archive-dir',
            archive_dir,
        ]

        # Create run archive directory if not already created and
        # job's archive directory
        create_job_archive(job_config['name'], job_config['archive_path'],
                           archive_dir)
        job_config_path = os.path.join(job_config['archive_path'],
                                       'orig.config.yaml')

        # Write initial job config in job archive dir
        with open(job_config_path, 'w') as f:
            yaml.safe_dump(job_config, f, default_flow_style=False)

        run_args.extend(["--job-config", job_config_path])

        try:
            job_proc = subprocess.Popen(run_args)
            log.info('Job supervisor PID: %s', job_proc.pid)
        except Exception:
            error_message = "Saw error while trying to spawn supervisor."
            log.exception(error_message)
            if 'targets' in job_config:
                nuke(supervisor.create_fake_context(job_config), True)
            report.try_push_job_info(
                job_config, dict(status='fail', failure_reason=error_message))

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")
Esempio n. 8
0
def main():
    from gevent import monkey
    monkey.patch_all(dns=False)
    from .orchestra import monkey
    monkey.patch_all()
    import logging

    ctx = parse_args()
    set_up_logging(ctx)
    log = logging.getLogger(__name__)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    write_initial_metadata(ctx)

    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)

    machine_type = ctx.machine_type
    if machine_type is None:
        fallback_default = ctx.config.get('machine_type', 'plana')
        machine_type = ctx.config.get('machine-type', fallback_default)

    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join([
        'Config:',
    ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({
            'internal.lock_machines': (len(ctx.config['roles']), machine_type)
        })

    init_tasks.extend([
        {
            'internal.save_config': None
        },
        {
            'internal.check_lock': None
        },
        {
            'internal.connect': None
        },
        {
            'internal.check_conflict': None
        },
        {
            'internal.check_ceph_data': None
        },
        {
            'internal.vm_setup': None
        },
    ])
    if 'kernel' in ctx.config:
        from teuthology.misc import get_distro
        distro = get_distro(ctx)
        if distro == 'ubuntu':
            init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
        {
            'internal.base': None
        },
        {
            'internal.archive': None
        },
        {
            'internal.coredump': None
        },
        {
            'internal.sudo': None
        },
        {
            'internal.syslog': None
        },
        {
            'internal.timer': None
        },
    ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
        with contextlib.closing(StringIO.StringIO()) as f:
            yaml.safe_dump(ctx.summary, f)
            log.info('Summary data:\n%s' % f.getvalue())
        with contextlib.closing(StringIO.StringIO()) as f:
            if 'email-on-error' in ctx.config and not ctx.summary.get(
                    'success', False):
                yaml.safe_dump(ctx.summary, f)
                yaml.safe_dump(ctx.config, f)
                emsg = f.getvalue()
                subject = "Teuthology error -- %s" % ctx.summary[
                    'failure_reason']
                from teuthology.suite import email_results
                email_results(subject, "Teuthology",
                              ctx.config['email-on-error'], emsg)
        if ctx.summary.get('success', True):
            log.info('pass')
        else:
            log.info('FAIL')
            import sys
            sys.exit(1)
Esempio n. 9
0
def main():
    from gevent import monkey
    monkey.patch_all(dns=False)
    from .orchestra import monkey
    monkey.patch_all()
    import logging

    ctx = parse_args()
    set_up_logging(ctx)
    log = logging.getLogger(__name__)

    if ctx.owner is None:
        from teuthology.misc import get_user
        ctx.owner = get_user()

    write_initial_metadata(ctx)

    if 'targets' in ctx.config and 'roles' in ctx.config:
        targets = len(ctx.config['targets'])
        roles = len(ctx.config['roles'])
        assert targets >= roles, \
            '%d targets are needed for all roles but found %d listed.' % (roles, targets)

    machine_type = ctx.machine_type
    if machine_type is None:
        fallback_default = ctx.config.get('machine_type', 'plana')
        machine_type = ctx.config.get('machine-type', fallback_default)

    if ctx.block:
        assert ctx.lock, \
            'the --block option is only supported with the --lock option'

    from teuthology.misc import read_config
    read_config(ctx)

    log.debug('\n  '.join(['Config:', ] + yaml.safe_dump(ctx.config, default_flow_style=False).splitlines()))

    ctx.summary = dict(success=True)

    ctx.summary['owner'] = ctx.owner

    if ctx.description is not None:
        ctx.summary['description'] = ctx.description

    for task in ctx.config['tasks']:
        assert 'kernel' not in task, \
            'kernel installation shouldn be a base-level item, not part of the tasks list'

    init_tasks = []
    if ctx.lock:
        assert 'targets' not in ctx.config, \
            'You cannot specify targets in a config file when using the --lock option'
        init_tasks.append({'internal.lock_machines': (len(ctx.config['roles']), machine_type)})

    init_tasks.extend([
            {'internal.save_config': None},
            {'internal.check_lock': None},
            {'internal.connect': None},
            {'internal.check_conflict': None},
            {'internal.check_ceph_data': None},
            {'internal.vm_setup': None},
            ])
    if 'kernel' in ctx.config:
        from teuthology.misc import get_distro
        distro = get_distro(ctx)
        if distro == 'ubuntu':
            init_tasks.append({'kernel': ctx.config['kernel']})
    init_tasks.extend([
            {'internal.base': None},
            {'internal.archive': None},
            {'internal.coredump': None},
            {'internal.sudo': None},
            {'internal.syslog': None},
            {'internal.timer': None},
            ])

    ctx.config['tasks'][:0] = init_tasks

    from teuthology.run_tasks import run_tasks
    try:
        run_tasks(tasks=ctx.config['tasks'], ctx=ctx)
    finally:
        if not ctx.summary.get('success') and ctx.config.get('nuke-on-error'):
            from teuthology.nuke import nuke
            # only unlock if we locked them in the first place
            nuke(ctx, log, ctx.lock)
        if ctx.archive is not None:
            with file(os.path.join(ctx.archive, 'summary.yaml'), 'w') as f:
                yaml.safe_dump(ctx.summary, f, default_flow_style=False)
        with contextlib.closing(StringIO.StringIO()) as f:
            yaml.safe_dump(ctx.summary, f)
            log.info('Summary data:\n%s' % f.getvalue())
        with contextlib.closing(StringIO.StringIO()) as f:
            if 'email-on-error' in ctx.config and not ctx.summary.get('success', False):
                yaml.safe_dump(ctx.summary, f)
                yaml.safe_dump(ctx.config, f)
                emsg = f.getvalue()
                subject = "Teuthology error -- %s" % ctx.summary['failure_reason']
                from teuthology.suite import email_results
                email_results(subject,"Teuthology",ctx.config['email-on-error'],emsg)
        if ctx.summary.get('success', True):
            log.info('pass')
        else:
            log.info('FAIL')
            import sys
            sys.exit(1)
Esempio n. 10
0
def test_nuke_internal():
    job_config = dict(
        owner='test_owner',
        targets={
            'user@host1': 'key1',
            'user@host2': 'key2'
        },
        archive_path='/path/to/test_run',
        machine_type='test_machine',
        os_type='centos',
        os_version='8.3',
        name='test_name',
    )
    locks = [{
        'name': target,
        'description': job_config['name']
    } for target in job_config['targets'].keys()]
    ctx = create_fake_context(job_config)

    # minimal call using defaults
    with patch.multiple(
            nuke,
            nuke_helper=DEFAULT,
            list_locks=lambda: locks,
            unlock_one=DEFAULT,
    ) as m:
        nuke.nuke(ctx, True)
        m['nuke_helper'].assert_called_with(ANY, True, False, True)
        m['unlock_one'].assert_called()

    # don't unlock
    with patch.multiple(
            nuke,
            nuke_helper=DEFAULT,
            list_locks=lambda: locks,
            unlock_one=DEFAULT,
    ) as m:
        nuke.nuke(ctx, False)
        m['nuke_helper'].assert_called_with(ANY, False, False, True)
        m['unlock_one'].assert_not_called()

    # mimicing what teuthology-dispatcher --supervisor does
    with patch.multiple(
            nuke,
            nuke_helper=DEFAULT,
            list_locks=lambda: locks,
            unlock_one=DEFAULT,
    ) as m:
        nuke.nuke(ctx, False, True, False, True, False)
        m['nuke_helper'].assert_called_with(ANY, False, True, False)
        m['unlock_one'].assert_not_called()

    # no targets
    del ctx.config['targets']
    with patch.multiple(
            nuke,
            nuke_helper=DEFAULT,
            unlock_one=DEFAULT,
    ) as m:
        nuke.nuke(ctx, True)
        m['nuke_helper'].assert_not_called()
        m['unlock_one'].assert_not_called()