Example #1
0
def main(args):

    verbose = args["--verbose"]
    archive_dir = args["--archive-dir"]
    teuth_bin_path = args["--bin-path"]
    config_file_path = args["--job-config"]

    with open(config_file_path, 'r') as config_file:
        job_config = yaml.safe_load(config_file)

    loglevel = logging.INFO
    if verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(job_config['archive_path'],
                                 f"supervisor.{job_config['job_id']}.log")
    setup_log_file(log_file_path)
    install_except_hook()

    # reimage target machines before running the job
    if 'targets' in job_config:
        reimage(job_config)
        with open(config_file_path, 'w') as f:
            yaml.safe_dump(job_config, f, default_flow_style=False)

    try:
        run_job(
            job_config,
            teuth_bin_path,
            archive_dir,
            verbose
        )
    except SkipJob:
        return
Example #2
0
def set_up_logging(verbose, archive):
    if verbose:
        teuthology.log.setLevel(logging.DEBUG)

    if archive is not None:
        os.mkdir(archive)

        teuthology.setup_log_file(os.path.join(archive, 'teuthology.log'))

    install_except_hook()
Example #3
0
def set_up_logging(verbose, archive):
    if verbose:
        teuthology.log.setLevel(logging.DEBUG)

    if archive is not None:
        os.mkdir(archive)

        teuthology.setup_log_file(os.path.join(archive, 'teuthology.log'))

    install_except_hook()
Example #4
0
def set_up_logging(ctx):
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    if ctx.archive is not None:
        os.mkdir(ctx.archive)

        teuthology.setup_log_file(logging.getLogger(),
                                  os.path.join(ctx.archive, 'teuthology.log'))

    install_except_hook()
Example #5
0
def set_up_logging(ctx):
    if ctx.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    if ctx.archive is not None:
        os.mkdir(ctx.archive)

        teuthology.setup_log_file(
            logging.getLogger(),
            os.path.join(ctx.archive, 'teuthology.log'))

    install_except_hook()
Example #6
0
def main(args):
    if args.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    log = logging.getLogger(__name__)

    read_config(args)

    log_path = os.path.join(args.test_dir, "coverage.log")
    teuthology.setup_log_file(log, log_path)

    try:
        analyze(args)
    except Exception:
        log.exception("error generating coverage")
        raise
Example #7
0
def main(args):
    if args["--verbose"]:
        teuthology.log.setLevel(logging.DEBUG)

    log = logging.getLogger(__name__)

    log_path = os.path.join(args['<test_dir>'], 'coverage.log')
    teuthology.setup_log_file(log_path)

    try:
        analyze(args['<test_dir>'], args['--cov-tools-dir'],
                args['--lcov-output'], args['--html-output'],
                args['--skip-init'])
    except Exception:
        log.exception('error generating coverage')
        raise
Example #8
0
def main(args):

    log = logging.getLogger(__name__)
    if args['--verbose']:
        teuthology.log.setLevel(logging.DEBUG)

    if not args['--dry-run']:
        log_path = os.path.join(args['--archive-dir'], 'results.log')
        teuthology.setup_log_file(log_path)

    try:
        results(args['--archive-dir'], args['--name'], args['--email'],
                int(args['--timeout']), args['--dry-run'])
    except Exception:
        log.exception('error generating results')
        raise
Example #9
0
def main(args):
    if args.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    log = logging.getLogger(__name__)

    read_config(args)

    log_path = os.path.join(args.test_dir, 'coverage.log')
    teuthology.setup_log_file(log, log_path)

    try:
        analyze(args)
    except Exception:
        log.exception('error generating coverage')
        raise
Example #10
0
def main(args):

    log = logging.getLogger(__name__)
    if args.verbose:
        teuthology.log.setLevel(logging.DEBUG)

    misc.read_config(args)

    log_path = os.path.join(args.archive_dir, 'results.log')
    teuthology.setup_log_file(log, log_path)

    try:
        results(args)
    except Exception:
        log.exception('error generating results')
        raise
Example #11
0
def main(args):

    log = logging.getLogger(__name__)
    if args['--verbose']:
        teuthology.log.setLevel(logging.DEBUG)

    if not args['--dry-run']:
        log_path = os.path.join(args['--archive-dir'], 'results.log')
        teuthology.setup_log_file(log_path)

    try:
        results(args['--archive-dir'], args['--name'], args['--email'],
                int(args['--timeout']), args['--dry-run'])
    except Exception:
        log.exception('error generating results')
        raise
Example #12
0
def main(args):
    if args["--verbose"]:
        teuthology.log.setLevel(logging.DEBUG)

    log = logging.getLogger(__name__)

    log_path = os.path.join(args['<test_dir>'], 'coverage.log')
    teuthology.setup_log_file(log_path)

    try:
        analyze(
            args['<test_dir>'],
            args['--cov-tools-dir'],
            args['--lcov-output'],
            args['--html-output'],
            args['--skip-init']
        )
    except Exception:
        log.exception('error generating coverage')
        raise
Example #13
0
def main(args):
    # run dispatcher in job supervisor mode if --supervisor passed
    if args["--supervisor"]:
        return supervisor.main(args)

    verbose = args["--verbose"]
    tube = args["--tube"]
    log_dir = args["--log-dir"]
    archive_dir = args["--archive-dir"]

    if archive_dir is None:
        archive_dir = teuth_config.archive_base

    # setup logging for disoatcher in {log_dir}
    loglevel = logging.INFO
    if verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)
    log_file_path = os.path.join(log_dir, f"dispatcher.{tube}.{os.getpid()}")
    setup_log_file(log_file_path)
    install_except_hook()

    load_config(archive_dir=archive_dir)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                archive_dir,
            )
        except SkipJob:
            continue

        # lock machines but do not reimage them
        if 'roles' in job_config:
            job_config = lock_machines(job_config)

        run_args = [
            os.path.join(teuth_bin_path, 'teuthology-dispatcher'),
            '--supervisor',
            '-v',
            '--bin-path',
            teuth_bin_path,
            '--archive-dir',
            archive_dir,
        ]

        # Create run archive directory if not already created and
        # job's archive directory
        create_job_archive(job_config['name'], job_config['archive_path'],
                           archive_dir)
        job_config_path = os.path.join(job_config['archive_path'],
                                       'orig.config.yaml')

        # Write initial job config in job archive dir
        with open(job_config_path, 'w') as f:
            yaml.safe_dump(job_config, f, default_flow_style=False)

        run_args.extend(["--job-config", job_config_path])

        try:
            job_proc = subprocess.Popen(run_args)
            log.info('Job supervisor PID: %s', job_proc.pid)
        except Exception:
            error_message = "Saw error while trying to spawn supervisor."
            log.exception(error_message)
            if 'targets' in job_config:
                nuke(supervisor.create_fake_context(job_config), True)
            report.try_push_job_info(
                job_config, dict(status='fail', failure_reason=error_message))

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")
Example #14
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
        pid=os.getpid(), tube=ctx.tube,))
    setup_log_file(log_file_path)

    install_except_hook()

    if not os.path.isdir(ctx.archive_dir):
        sys.exit("{prog}: archive directory must exist: {path}".format(
            prog=os.path.basename(sys.argv[0]),
            path=ctx.archive_dir,
        ))
    else:
        teuth_config.archive_base = ctx.archive_dir

    set_config_attr(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if need_restart():
            restart()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                ctx.archive_dir,
            )
            run_job(
                job_config,
                teuth_bin_path,
                ctx.archive_dir,
                ctx.verbose,
            )
        except SkipJob:
            continue

        job.delete()
Example #15
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
        pid=os.getpid(), tube=ctx.tube,))
    setup_log_file(log_file_path)

    install_except_hook()

    load_config(ctx=ctx)

    set_config_attr(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                ctx.archive_dir,
            )
            run_job(
                job_config,
                teuth_bin_path,
                ctx.archive_dir,
                ctx.verbose,
            )
        except SkipJob:
            continue

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")
Example #16
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
        pid=os.getpid(), tube=ctx.tube,))
    setup_log_file(log, log_file_path)

    if not os.path.isdir(ctx.archive_dir):
        sys.exit("{prog}: archive directory must exist: {path}".format(
            prog=os.path.basename(sys.argv[0]),
            path=ctx.archive_dir,
        ))
    else:
        teuth_config.archive_base = ctx.archive_dir

    read_config(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)

    while True:
        if need_restart():
            restart()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        log.info('Reserved job %d', job.jid)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)

        job_config['job_id'] = str(job.jid)
        safe_archive = safepath.munge(job_config['name'])
        job_config['worker_log'] = log_file_path
        archive_path_full = os.path.join(
            ctx.archive_dir, safe_archive, str(job.jid))
        job_config['archive_path'] = archive_path_full

        # If the teuthology branch was not specified, default to master and
        # store that value.
        teuthology_branch = job_config.get('teuthology_branch', 'master')
        job_config['teuthology_branch'] = teuthology_branch

        teuth_path = os.path.join(os.getenv("HOME"),
                                  'teuthology-' + teuthology_branch)

        fetch_teuthology_branch(path=teuth_path, branch=teuthology_branch)

        teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
        if not os.path.isdir(teuth_bin_path):
            raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
                               (teuthology_branch, teuth_bin_path))

        if job_config.get('last_in_suite'):
            log.info('Generating coverage for %s', job_config['name'])
            if teuth_config.results_server:
                report.try_delete_jobs(job_config['name'],
                                       job_config['job_id'])
            args = [
                os.path.join(teuth_bin_path, 'teuthology-results'),
                '--timeout',
                str(job_config.get('results_timeout', 21600)),
                '--email',
                job_config['email'],
                '--archive-dir',
                os.path.join(ctx.archive_dir, safe_archive),
                '--name',
                job_config['name'],
            ]
            subprocess.Popen(args=args).wait()
        else:
            log.info('Creating archive dir %s', archive_path_full)
            safepath.makedirs(ctx.archive_dir, safe_archive)
            log.info('Running job %d', job.jid)
            run_job(job_config, teuth_bin_path)
        job.delete()
Example #17
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(
        ctx.log_dir, 'worker.{tube}.{pid}'.format(
            pid=os.getpid(),
            tube=ctx.tube,
        ))
    setup_log_file(log_file_path)

    install_except_hook()

    if not os.path.isdir(ctx.archive_dir):
        sys.exit("{prog}: archive directory must exist: {path}".format(
            prog=os.path.basename(sys.argv[0]),
            path=ctx.archive_dir,
        ))
    else:
        teuth_config.archive_base = ctx.archive_dir

    read_config(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    fetch_teuthology('master')
    fetch_qa_suite('master')

    while True:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if need_restart():
            restart()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        log.info('Reserved job %d', job.jid)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)

        job_config['job_id'] = str(job.jid)
        safe_archive = safepath.munge(job_config['name'])
        job_config['worker_log'] = log_file_path
        archive_path_full = os.path.join(ctx.archive_dir, safe_archive,
                                         str(job.jid))
        job_config['archive_path'] = archive_path_full

        # If the teuthology branch was not specified, default to master and
        # store that value.
        teuthology_branch = job_config.get('teuthology_branch', 'master')
        job_config['teuthology_branch'] = teuthology_branch

        try:
            teuth_path = fetch_teuthology(branch=teuthology_branch)
            # For the teuthology tasks, we look for suite_branch, and if we
            # don't get that, we look for branch, and fall back to 'master'.
            # last-in-suite jobs don't have suite_branch or branch set.
            ceph_branch = job_config.get('branch', 'master')
            suite_branch = job_config.get('suite_branch', ceph_branch)
            job_config['suite_path'] = fetch_qa_suite(suite_branch)
        except BranchNotFoundError:
            log.exception("Branch not found; throwing job away")
            # Optionally, we could mark the job as dead, but we don't have a
            # great way to express why it is dead.
            report.try_delete_jobs(job_config['name'], job_config['job_id'])
            continue

        teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
        if not os.path.isdir(teuth_bin_path):
            raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
                               (teuthology_branch, teuth_bin_path))

        if job_config.get('last_in_suite'):
            if teuth_config.results_server:
                report.try_delete_jobs(job_config['name'],
                                       job_config['job_id'])
            log.info('Generating results email for %s', job_config['name'])
            args = [
                os.path.join(teuth_bin_path, 'teuthology-results'),
                '--timeout',
                str(
                    job_config.get('results_timeout',
                                   teuth_config.results_timeout)),
                '--email',
                job_config['email'],
                '--archive-dir',
                os.path.join(ctx.archive_dir, safe_archive),
                '--name',
                job_config['name'],
            ]
            # Execute teuthology-results, passing 'preexec_fn=os.setpgrp' to
            # make sure that it will continue to run if this worker process
            # dies (e.g. because of a restart)
            result_proc = subprocess.Popen(args=args, preexec_fn=os.setpgrp)
            log.info("teuthology-results PID: %s", result_proc.pid)
        else:
            log.info('Creating archive dir %s', archive_path_full)
            safepath.makedirs(ctx.archive_dir, safe_archive)
            log.info('Running job %d', job.jid)
            run_job(job_config, teuth_bin_path)
        job.delete()
Example #18
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
        pid=os.getpid(), tube=ctx.tube,))
    setup_log_file(log_file_path)

    install_except_hook()

    if not os.path.isdir(ctx.archive_dir):
        sys.exit("{prog}: archive directory must exist: {path}".format(
            prog=os.path.basename(sys.argv[0]),
            path=ctx.archive_dir,
        ))
    else:
        teuth_config.archive_base = ctx.archive_dir

    read_config(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    fetch_teuthology('master')
    fetch_qa_suite('master')

    while True:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if need_restart():
            restart()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        log.info('Reserved job %d', job.jid)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)

        job_config['job_id'] = str(job.jid)
        safe_archive = safepath.munge(job_config['name'])
        job_config['worker_log'] = log_file_path
        archive_path_full = os.path.join(
            ctx.archive_dir, safe_archive, str(job.jid))
        job_config['archive_path'] = archive_path_full

        # If the teuthology branch was not specified, default to master and
        # store that value.
        teuthology_branch = job_config.get('teuthology_branch', 'master')
        job_config['teuthology_branch'] = teuthology_branch

        try:
            teuth_path = fetch_teuthology(branch=teuthology_branch)
            # For the teuthology tasks, we look for suite_branch, and if we
            # don't get that, we look for branch, and fall back to 'master'.
            # last-in-suite jobs don't have suite_branch or branch set.
            ceph_branch = job_config.get('branch', 'master')
            suite_branch = job_config.get('suite_branch', ceph_branch)
            job_config['suite_path'] = fetch_qa_suite(suite_branch)
        except BranchNotFoundError as exc:
            log.exception("Branch not found; marking job as dead")
            report.try_push_job_info(
                job_config,
                dict(status='dead', failure_reason=str(exc))
            )
            continue

        teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
        if not os.path.isdir(teuth_bin_path):
            raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
                               (teuthology_branch, teuth_bin_path))

        if job_config.get('last_in_suite'):
            if teuth_config.results_server:
                report.try_delete_jobs(job_config['name'],
                                       job_config['job_id'])
            log.info('Generating results email for %s', job_config['name'])
            args = [
                os.path.join(teuth_bin_path, 'teuthology-results'),
                '--timeout',
                str(job_config.get('results_timeout',
                                   teuth_config.results_timeout)),
                '--email',
                job_config['email'],
                '--archive-dir',
                os.path.join(ctx.archive_dir, safe_archive),
                '--name',
                job_config['name'],
            ]
            # Execute teuthology-results, passing 'preexec_fn=os.setpgrp' to
            # make sure that it will continue to run if this worker process
            # dies (e.g. because of a restart)
            result_proc = subprocess.Popen(args=args, preexec_fn=os.setpgrp)
            log.info("teuthology-results PID: %s", result_proc.pid)
        else:
            log.info('Creating archive dir %s', archive_path_full)
            safepath.makedirs(ctx.archive_dir, safe_archive)
            log.info('Running job %d', job.jid)
            run_job(job_config, teuth_bin_path, ctx.verbose)
        job.delete()
Example #19
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(
        ctx.log_dir, 'worker.{tube}.{pid}'.format(
            pid=os.getpid(),
            tube=ctx.tube,
        ))
    setup_log_file(log_file_path)

    install_except_hook()

    load_config(ctx=ctx)

    set_config_attr(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                ctx.archive_dir,
            )
            run_job(
                job_config,
                teuth_bin_path,
                ctx.archive_dir,
                ctx.verbose,
            )
        except SkipJob:
            continue

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")
Example #20
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(ctx.log_dir, 'worker.{tube}.{pid}'.format(
        pid=os.getpid(), tube=ctx.tube,))
    setup_log_file(log, log_file_path)

    install_except_hook()

    if not os.path.isdir(ctx.archive_dir):
        sys.exit("{prog}: archive directory must exist: {path}".format(
            prog=os.path.basename(sys.argv[0]),
            path=ctx.archive_dir,
        ))
    else:
        teuth_config.archive_base = ctx.archive_dir

    read_config(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)

    while True:
        if need_restart():
            restart()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        log.info('Reserved job %d', job.jid)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)

        job_config['job_id'] = str(job.jid)
        safe_archive = safepath.munge(job_config['name'])
        job_config['worker_log'] = log_file_path
        archive_path_full = os.path.join(
            ctx.archive_dir, safe_archive, str(job.jid))
        job_config['archive_path'] = archive_path_full

        # If the teuthology branch was not specified, default to master and
        # store that value.
        teuthology_branch = job_config.get('teuthology_branch', 'master')
        job_config['teuthology_branch'] = teuthology_branch

        teuth_path = os.path.join(os.getenv("HOME"),
                                  'teuthology-' + teuthology_branch)

        try:
            fetch_teuthology_branch(path=teuth_path, branch=teuthology_branch)
        except BranchNotFoundError:
            log.exception(
                "Branch not found; throwing job away")
            # Optionally, we could mark the job as dead, but we don't have a
            # great way to express why it is dead.
            report.try_delete_jobs(job_config['name'],
                                   job_config['job_id'])
            continue

        teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
        if not os.path.isdir(teuth_bin_path):
            raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
                               (teuthology_branch, teuth_bin_path))

        if job_config.get('last_in_suite'):
            if teuth_config.results_server:
                report.try_delete_jobs(job_config['name'],
                                       job_config['job_id'])
            log.info('Generating results email for %s', job_config['name'])
            args = [
                os.path.join(teuth_bin_path, 'teuthology-results'),
                '--timeout',
                str(job_config.get('results_timeout', 21600)),
                '--email',
                job_config['email'],
                '--archive-dir',
                os.path.join(ctx.archive_dir, safe_archive),
                '--name',
                job_config['name'],
            ]
            # Execute teuthology-results, passing 'preexec_fn=os.setpgrp' to
            # make sure that it will continue to run if this worker process
            # dies (e.g. because of a restart)
            result_pid = subprocess.Popen(args=args,
                                          preexec_fn=os.setpgrp,).pid
            # Indicate that we don't care about collecting its return code, so
            # it doesn't become a zombie. We can't have zombies piling up.
            signal.signal(signal.SIGCHLD, signal.SIG_IGN)
            log.info("teuthology-results PID: %s", result_pid)
        else:
            log.info('Creating archive dir %s', archive_path_full)
            safepath.makedirs(ctx.archive_dir, safe_archive)
            log.info('Running job %d', job.jid)
            run_job(job_config, teuth_bin_path)
        job.delete()