예제 #1
0
def prep_job(job_config, log_file_path, archive_dir):
    job_id = job_config['job_id']
    safe_archive = safepath.munge(job_config['name'])
    job_config['worker_log'] = log_file_path
    archive_path_full = os.path.join(archive_dir, safe_archive, str(job_id))
    job_config['archive_path'] = archive_path_full

    # If the teuthology branch was not specified, default to master and
    # store that value.
    teuthology_branch = job_config.get('teuthology_branch', 'master')
    job_config['teuthology_branch'] = teuthology_branch
    teuthology_sha1 = job_config.get('teuthology_sha1')
    if not teuthology_sha1:
        repo_url = build_git_url('teuthology', 'ceph')
        teuthology_sha1 = ls_remote(repo_url, teuthology_branch)
        if not teuthology_sha1:
            reason = "Teuthology branch {} not found; marking job as dead".format(
                teuthology_branch)
            log.error(reason)
            report.try_push_job_info(
                job_config, dict(status='dead', failure_reason=reason))
            raise SkipJob()
        log.info('Using teuthology sha1 %s', teuthology_sha1)

    try:
        if teuth_config.teuthology_path is not None:
            teuth_path = teuth_config.teuthology_path
        else:
            teuth_path = fetch_teuthology(branch=teuthology_branch,
                                          commit=teuthology_sha1)
        # For the teuthology tasks, we look for suite_branch, and if we
        # don't get that, we look for branch, and fall back to 'master'.
        # last-in-suite jobs don't have suite_branch or branch set.
        ceph_branch = job_config.get('branch', 'master')
        suite_branch = job_config.get('suite_branch', ceph_branch)
        suite_sha1 = job_config.get('suite_sha1')
        suite_repo = job_config.get('suite_repo')
        if suite_repo:
            teuth_config.ceph_qa_suite_git_url = suite_repo
        job_config['suite_path'] = os.path.normpath(
            os.path.join(
                fetch_qa_suite(suite_branch, suite_sha1),
                job_config.get('suite_relpath', ''),
            ))
    except (BranchNotFoundError, CommitNotFoundError) as exc:
        log.exception("Requested version not found; marking job as dead")
        report.try_push_job_info(job_config,
                                 dict(status='dead', failure_reason=str(exc)))
        raise SkipJob()
    except MaxWhileTries as exc:
        log.exception("Failed to fetch or bootstrap; marking job as dead")
        report.try_push_job_info(job_config,
                                 dict(status='dead', failure_reason=str(exc)))
        raise SkipJob()

    teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
    if not os.path.isdir(teuth_bin_path):
        raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
                           (teuthology_branch, teuth_bin_path))
    return job_config, teuth_bin_path
예제 #2
0
def fetch_tasks_if_needed(job_config):
    """
    Fetch the suite repo (and include it in sys.path) so that we can use its
    tasks.

    Returns the suite_path. The existing suite_path will be returned if the
    tasks can be imported, if not a new suite_path will try to be determined.
    """
    # Any scheduled job will already have the suite checked out and its
    # $PYTHONPATH set. We can check for this by looking for 'suite_path'
    # in its config.
    suite_path = job_config.get('suite_path')
    if suite_path:
        log.info("suite_path is set to %s; will attempt to use it", suite_path)
        if suite_path not in sys.path:
            sys.path.insert(1, suite_path)

    try:
        import tasks
        log.info("Found tasks at %s", os.path.dirname(tasks.__file__))
        # tasks found with the existing suite branch, return it
        return suite_path
    except ImportError:
        log.info("Tasks not found; will attempt to fetch")

    ceph_branch = job_config.get('branch', 'master')
    suite_repo = job_config.get('suite_repo')
    if suite_repo:
        teuth_config.ceph_qa_suite_git_url = suite_repo
    suite_branch = job_config.get('suite_branch', ceph_branch)
    suite_sha1 = job_config.get('suite_sha1')
    suite_path = os.path.normpath(
        os.path.join(
            fetch_qa_suite(suite_branch, commit=suite_sha1),
            job_config.get('suite_relpath', ''),
        ))
    sys.path.insert(1, suite_path)
    return suite_path
예제 #3
0
def fetch_repos(branch, test_name):
    """
    Fetch the suite repo (and also the teuthology repo) so that we can use it
    to build jobs. Repos are stored in ~/src/.

    The reason the teuthology repo is also fetched is that currently we use
    subprocess to call teuthology-schedule to schedule jobs so we need to make
    sure it is up-to-date. For that reason we always fetch the master branch
    for test scheduling, regardless of what teuthology branch is requested for
    testing.

    :returns: The path to the suite repo on disk
    """
    try:
        # When a user is scheduling a test run from their own copy of
        # teuthology, let's not wreak havoc on it.
        if config.automated_scheduling:
            # We use teuthology's master branch in all cases right now
            if config.teuthology_path is None:
                fetch_teuthology('master')
        suite_repo_path = fetch_qa_suite(branch)
    except BranchNotFoundError as exc:
        schedule_fail(message=str(exc), name=test_name)
    return suite_repo_path
예제 #4
0
def main(ctx):
    loglevel = logging.INFO
    if ctx.verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)

    log_file_path = os.path.join(
        ctx.log_dir, 'worker.{tube}.{pid}'.format(
            pid=os.getpid(),
            tube=ctx.tube,
        ))
    setup_log_file(log_file_path)

    install_except_hook()

    load_config(ctx=ctx)

    set_config_attr(ctx)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, ctx.tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                ctx.archive_dir,
            )
            run_job(
                job_config,
                teuth_bin_path,
                ctx.archive_dir,
                ctx.verbose,
            )
        except SkipJob:
            continue

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")
예제 #5
0
def main(args):
    # run dispatcher in job supervisor mode if --supervisor passed
    if args["--supervisor"]:
        return supervisor.main(args)

    verbose = args["--verbose"]
    tube = args["--tube"]
    log_dir = args["--log-dir"]
    archive_dir = args["--archive-dir"]

    if archive_dir is None:
        archive_dir = teuth_config.archive_base

    # setup logging for disoatcher in {log_dir}
    loglevel = logging.INFO
    if verbose:
        loglevel = logging.DEBUG
    log.setLevel(loglevel)
    log_file_path = os.path.join(log_dir, f"dispatcher.{tube}.{os.getpid()}")
    setup_log_file(log_file_path)
    install_except_hook()

    load_config(archive_dir=archive_dir)

    connection = beanstalk.connect()
    beanstalk.watch_tube(connection, tube)
    result_proc = None

    if teuth_config.teuthology_path is None:
        fetch_teuthology('master')
    fetch_qa_suite('master')

    keep_running = True
    while keep_running:
        # Check to see if we have a teuthology-results process hanging around
        # and if so, read its return code so that it can exit.
        if result_proc is not None and result_proc.poll() is not None:
            log.debug("teuthology-results exited with code: %s",
                      result_proc.returncode)
            result_proc = None

        if sentinel(restart_file_path):
            restart()
        elif sentinel(stop_file_path):
            stop()

        load_config()

        job = connection.reserve(timeout=60)
        if job is None:
            continue

        # bury the job so it won't be re-run if it fails
        job.bury()
        job_id = job.jid
        log.info('Reserved job %d', job_id)
        log.info('Config is: %s', job.body)
        job_config = yaml.safe_load(job.body)
        job_config['job_id'] = str(job_id)

        if job_config.get('stop_worker'):
            keep_running = False

        try:
            job_config, teuth_bin_path = prep_job(
                job_config,
                log_file_path,
                archive_dir,
            )
        except SkipJob:
            continue

        # lock machines but do not reimage them
        if 'roles' in job_config:
            job_config = lock_machines(job_config)

        run_args = [
            os.path.join(teuth_bin_path, 'teuthology-dispatcher'),
            '--supervisor',
            '-v',
            '--bin-path',
            teuth_bin_path,
            '--archive-dir',
            archive_dir,
        ]

        # Create run archive directory if not already created and
        # job's archive directory
        create_job_archive(job_config['name'], job_config['archive_path'],
                           archive_dir)
        job_config_path = os.path.join(job_config['archive_path'],
                                       'orig.config.yaml')

        # Write initial job config in job archive dir
        with open(job_config_path, 'w') as f:
            yaml.safe_dump(job_config, f, default_flow_style=False)

        run_args.extend(["--job-config", job_config_path])

        try:
            job_proc = subprocess.Popen(run_args)
            log.info('Job supervisor PID: %s', job_proc.pid)
        except Exception:
            error_message = "Saw error while trying to spawn supervisor."
            log.exception(error_message)
            if 'targets' in job_config:
                nuke(supervisor.create_fake_context(job_config), True)
            report.try_push_job_info(
                job_config, dict(status='fail', failure_reason=error_message))

        # This try/except block is to keep the worker from dying when
        # beanstalkc throws a SocketError
        try:
            job.delete()
        except Exception:
            log.exception("Saw exception while trying to delete job")