Ejemplo n.º 1
0
def fetch_logs(job, max_idle_time, log_follower) -> None:
    # Poll to check for new logs, assuming that a prolonged period of
    # silence means that the device has died and we should try it again
    if datetime.now() - job.last_log_time > max_idle_time:
        max_idle_time_min = max_idle_time.total_seconds() / 60

        raise MesaCITimeoutError(
            f"{CONSOLE_LOG['BOLD']}"
            f"{CONSOLE_LOG['FG_YELLOW']}"
            f"LAVA job {job.job_id} does not respond for {max_idle_time_min} "
            "minutes. Retry."
            f"{CONSOLE_LOG['RESET']}",
            timeout_duration=max_idle_time,
        )

    time.sleep(LOG_POLLING_TIME_SEC)

    # The XMLRPC binary packet may be corrupted, causing a YAML scanner error.
    # Retry the log fetching several times before exposing the error.
    for _ in range(5):
        with contextlib.suppress(MesaCIParseException):
            new_log_lines = job.get_logs()
            break
    else:
        raise MesaCIParseException

    if log_follower.feed(new_log_lines):
        # If we had non-empty log data, we can assure that the device is alive.
        job.heartbeat()
    parsed_lines = log_follower.flush()

    parsed_lines = job.parse_job_result_from_log(parsed_lines)

    for line in parsed_lines:
        print_log(line)
Ejemplo n.º 2
0
def print_job_final_status(job):
    if job.status == "running":
        job.status = "hung"

    color = LAVAJob.COLOR_STATUS_MAP.get(job.status, CONSOLE_LOG["FG_RED"])
    print_log(f"{color}"
              f"LAVA Job finished with status: {job.status}"
              f"{CONSOLE_LOG['RESET']}")
Ejemplo n.º 3
0
def retriable_follow_job(proxy, job_definition) -> LAVAJob:
    retry_count = NUMBER_OF_RETRIES_TIMEOUT_DETECTION

    for attempt_no in range(1, retry_count + 2):
        job = LAVAJob(proxy, job_definition)
        try:
            follow_job_execution(job)
            return job
        except MesaCIKnownIssueException as found_issue:
            print_log(found_issue)
            job.status = "canceled"
        except MesaCIException as mesa_exception:
            print_log(mesa_exception)
            job.cancel()
        except KeyboardInterrupt as e:
            print_log(
                "LAVA job submitter was interrupted. Cancelling the job.")
            job.cancel()
            raise e
        finally:
            print_log(
                f"{CONSOLE_LOG['BOLD']}"
                f"Finished executing LAVA job in the attempt #{attempt_no}"
                f"{CONSOLE_LOG['RESET']}")
            print_job_final_status(job)

    raise MesaCIRetryError(
        f"{CONSOLE_LOG['BOLD']}"
        f"{CONSOLE_LOG['FG_RED']}"
        "Job failed after it exceeded the number of "
        f"{retry_count} retries."
        f"{CONSOLE_LOG['RESET']}",
        retry_count=retry_count,
    )
Ejemplo n.º 4
0
def setup_lava_proxy():
    config = lavacli.load_config("default")
    uri, usr, tok = (config.get(key) for key in ("uri", "username", "token"))
    uri_obj = urllib.parse.urlparse(uri)
    uri_str = "{}://{}:{}@{}{}".format(uri_obj.scheme, usr, tok,
                                       uri_obj.netloc, uri_obj.path)
    transport = lavacli.RequestsTransport(
        uri_obj.scheme,
        config.get("proxy"),
        config.get("timeout", 120.0),
        config.get("verify_ssl_cert", True),
    )
    proxy = xmlrpc.client.ServerProxy(uri_str,
                                      allow_none=True,
                                      transport=transport)

    print_log("Proxy for {} created.".format(config['uri']))

    return proxy
Ejemplo n.º 5
0
def follow_job_execution(job):
    try:
        job.submit()
    except Exception as mesa_ci_err:
        raise MesaCIException(
            f"Could not submit LAVA job. Reason: {mesa_ci_err}"
        ) from mesa_ci_err

    print_log(f"Waiting for job {job.job_id} to start.")
    while not job.is_started():
        time.sleep(WAIT_FOR_DEVICE_POLLING_TIME_SEC)
    print_log(f"Job {job.job_id} started.")

    gl = GitlabSection(
        id="lava_boot",
        header="LAVA boot",
        type=LogSectionType.LAVA_BOOT,
        start_collapsed=True,
    )
    print(gl.start())
    max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
    with LogFollower(current_section=gl) as lf:

        max_idle_time = timedelta(seconds=DEVICE_HANGING_TIMEOUT_SEC)
        # Start to check job's health
        job.heartbeat()
        while not job.is_finished:
            fetch_logs(job, max_idle_time, lf)

    show_job_data(job)

    # Mesa Developers expect to have a simple pass/fail job result.
    # If this does not happen, it probably means a LAVA infrastructure error
    # happened.
    if job.status not in ["pass", "fail"]:
        find_lava_error(job)
Ejemplo n.º 6
0
    job_definition = generate_lava_yaml(args)

    if args.dump_yaml:
        with GitlabSection(
                "yaml_dump",
                "LAVA job definition (YAML)",
                type=LogSectionType.LAVA_BOOT,
                start_collapsed=True,
        ):
            print(hide_sensitive_data(job_definition))
    job = LAVAJob(proxy, job_definition)

    if errors := job.validate():
        fatal_err(f"Error in LAVA job definition: {errors}")
    print_log("LAVA job definition validated successfully")

    if args.validate_only:
        return

    finished_job = retriable_follow_job(proxy, job_definition)
    exit_code = 0 if finished_job.status == "pass" else 1
    sys.exit(exit_code)


def create_parser():
    parser = argparse.ArgumentParser("LAVA job submitter")

    parser.add_argument("--pipeline-info")
    parser.add_argument("--rootfs-url-prefix")
    parser.add_argument("--kernel-url-prefix")