Exemple #1
0
def fetch_commit(repo_dir, repo_url, commit_sha, depth=1):
    # The unfortunate retry complexity here is due to mysterious errors we
    # sometimes get when fetching commits in the live environment:
    #
    #   error: RPC failed; curl 56 GnuTLS recv error (-9): A TLS packed with unexpected length was received
    #
    # They are mostly transient (hence the retries) but certain commits seem to
    # trigger them more often than others so presumably it's something to do
    # with the precise sequence of packets that get sent. More details here:
    # https://github.com/opensafely/job-runner/issues/5
    max_retries = 5
    sleep = 4
    attempt = 1
    authenticated_url = add_access_token_and_proxy(repo_url)
    while True:
        try:
            subprocess_run(
                [
                    "git",
                    "fetch",
                    "--force",
                    "--depth",
                    str(depth),
                    authenticated_url,
                    commit_sha,
                ],
                check=True,
                capture_output=True,
                cwd=repo_dir,
                env=NEVER_PROMPT_FOR_AUTH_ENV,
            )
            mark_commmit_as_fetched(repo_dir, commit_sha)
            break
        except subprocess.SubprocessError as e:
            redact_token_from_exception(e)
            log.exception(
                f"Error fetching commit (attempt {attempt}/{max_retries})")
            if (b"GnuTLS recv error" in e.stderr
                    or b"SSL_read: Connection was reset" in e.stderr):
                attempt += 1
                if attempt > max_retries:
                    raise GitError(
                        f"Network error when fetching commit {commit_sha} from"
                        f" {repo_url}\n"
                        "(This may work if you try again later)")
                else:
                    time.sleep(sleep)
                    sleep *= 2
            else:
                raise GitError(
                    f"Error fetching commit {commit_sha} from {repo_url}")
Exemple #2
0
def docker_preflight_check():
    try:
        subprocess_run(["docker", "info"], check=True, capture_output=True)
    except FileNotFoundError:
        print("Could not find application: docker")
        print("\nYou must have Docker installed to run this command, see:")
        print("https://docs.docker.com/get-docker/")
        return False
    except subprocess.CalledProcessError:
        print("There was an error running: docker info")
        print(
            "\nIt looks like you have Docker installed but have not started it."
        )
        return False
    return True
Exemple #3
0
def commit_is_ancestor(repo_dir, ancestor_sha, descendant_sha):
    response = subprocess_run(
        ["git", "merge-base", "--is-ancestor", ancestor_sha, descendant_sha],
        cwd=repo_dir,
        capture_output=True,
    )
    return response.returncode == 0
Exemple #4
0
def commit_already_fetched(repo_dir, commit_sha):
    """
    Return whether a given commit exists in a repo directory

    We used to do this with:

        git cat-file -e 'COMMIT_SHA^{commit}'

    However it's possible that an interrupted fetch leaves the commit object in
    place without all of its associated blobs, meaning that the above check
    passes but attempting to check out the commit will fail. To work around
    this we create a special "sentinel" tag for each commit to indicate that
    the entire fetch process has completed successfully.
    """
    response = subprocess_run(
        [
            "git",
            "tag",
            "--list",
            SENTINEL_TAG_PREFIX + commit_sha,
            "--points-at",
            commit_sha,
            "--format",
            "exists",
        ],
        check=True,
        capture_output=True,
        cwd=repo_dir,
    )
    return response.stdout.strip() == b"exists"
Exemple #5
0
def mark_commmit_as_fetched(repo_dir, commit_sha):
    """
    Create a special "sentinel" tag to indicate that the supplied commit has
    been fully fetched (see `commit_already_fetched` above)
    """
    subprocess_run(
        [
            "git",
            "tag",
            "--force",
            SENTINEL_TAG_PREFIX + commit_sha,
            commit_sha,
        ],
        check=True,
        capture_output=True,
        cwd=repo_dir,
    )
Exemple #6
0
def delete_docker_entities(entity, label, ignore_errors=False):
    ls_args = [
        "docker",
        entity,
        "ls",
        "--all" if entity == "container" else None,
        "--filter",
        f"label={label}",
        "--quiet",
    ]
    ls_args = list(filter(None, ls_args))
    response = subprocess_run(ls_args,
                              capture_output=True,
                              encoding="ascii",
                              check=not ignore_errors)
    ids = response.stdout.split()
    if ids and response.returncode == 0:
        rm_args = ["docker", entity, "rm", "--force"] + ids
        subprocess_run(rm_args, capture_output=True, check=not ignore_errors)
Exemple #7
0
 def git_clone(repo_url, cwd):
     cmd = ["git", "clone", "--depth=1", repo_url, "repo"]
     # GIT_TERMINAL_PROMPT=0 means it will fail if it requires auth. This
     # allows us to retry with an ssh url on linux/mac, as they would
     # generally prompt given an https url.
     result = subprocess_run(
         cmd,
         cwd=cwd,
         capture_output=True,
         env=dict(os.environ, GIT_TERMINAL_PROMPT="0"),
     )
     return result.returncode == 0
Exemple #8
0
def checkout_commit(repo_url, commit_sha, target_dir):
    """
    Checkout the contents of `repo_url` as of `commit_sha` into `target_dir`
    """
    repo_dir = get_local_repo_dir(repo_url)
    ensure_commit_fetched(repo_dir, repo_url, commit_sha)
    os.makedirs(target_dir, exist_ok=True)
    subprocess_run(
        [
            "git",
            f"--work-tree={target_dir}",
            "checkout",
            "--quiet",
            "--force",
            commit_sha,
        ],
        check=True,
        # Set GIT_DIR rather than changing working directory so that
        # `target_dir` gets correctly resolved
        env=dict(os.environ, GIT_DIR=repo_dir),
    )
def get_container_stats():
    response = subprocess_run(
        ["docker", "stats", "--no-stream", "--format", "{{json .}}"],
        capture_output=True,
        check=True,
    )
    data = [json.loads(line) for line in response.stdout.splitlines()]
    return {
        row["Name"]: {
            "cpu_percentage": float(row["CPUPerc"].rstrip("%")),
            "memory_used": _parse_size(row["MemUsage"].split()[0]),
        }
        for row in data
    }
Exemple #10
0
def get_sha_from_remote_ref(repo_url, ref):
    """Gets the SHA of the commit associated with the ref at the repo URL.

    Args:
        repo_url: A repo URL.
        ref: A ref, such as a branch name, tag name, etc.

    Returns:
        The SHA of the commit. For example, if the ref is an annotated tag, then the SHA
        will be that of the associated commit, rather than that of the annotated tag.

    Raises:
        GitRepoNotReachableError: We couldn't read from the remote repo
        GitUnknownRefError: We couldn't find the specified ref in the remote repo
    """
    # If `ref` matches an annotated tag, then `deref_ref` will match the associated
    # commit.
    deref_ref = f"{ref}^{{}}"
    try:
        response = subprocess_run(
            [
                "git",
                "ls-remote",
                "--quiet",
                add_access_token_and_proxy(repo_url),
                ref,
                deref_ref,
            ],
            check=True,
            capture_output=True,
            text=True,
            encoding="utf-8",
            env=NEVER_PROMPT_FOR_AUTH_ENV,
        )
        output = response.stdout
    except subprocess.SubprocessError as exc:
        redact_token_from_exception(exc)
        log.exception("Error reading from remote repository")
        raise GitRepoNotReachableError(f"Could not read from {repo_url}")
    results = _parse_ls_remote_output(output)
    for target_ref in [
            ref,  # e.g. HEAD
            f"refs/heads/{ref}",  # Branch
            f"refs/tags/{deref_ref}",  # Annotated tag
            f"refs/tags/{ref}",  # Lightweight tag
    ]:
        if target_ref in results:
            return results[target_ref]
    raise GitUnknownRefError(f"Could not find ref '{ref}' in {repo_url}")
def get_volume_and_container_sizes():
    response = subprocess_run(
        ["docker", "system", "df", "--verbose", "--format", "{{json .}}"],
        capture_output=True,
        check=True,
    )
    data = json.loads(response.stdout)
    volumes = {
        row["Name"]: _parse_size(row["Size"])
        for row in data["Volumes"]
    }
    containers = {
        row["Names"]: _parse_size(row["Size"])
        for row in data["Containers"]
    }
    return volumes, containers
Exemple #12
0
def docker(docker_args, timeout=DEFAULT_TIMEOUT, **kwargs):
    args = ["docker"] + docker_args
    try:
        return subprocess_run(args, timeout=timeout, **kwargs)
    except subprocess.TimeoutExpired as e:
        raise DockerTimeoutError from e
    except subprocess.CalledProcessError as e:
        output = e.stderr
        if output is None:
            output = e.stdout
        if isinstance(output, bytes):
            output = output.decode("utf8", "ignore")
        if (output is not None and e.returncode == 1
                and "Error response from daemon:" in output
                and ": no space left on device" in output):
            raise DockerDiskSpaceError from e
        else:
            raise
Exemple #13
0
def license_repo(tmp_path):
    # create a repo to clone the license from
    repo = tmp_path / "test-repo"
    repo.mkdir()
    license = repo / "stata.lic"  # noqa: A001
    license.write_text("repo-license")
    git = ["git", "-c", "user.name=test", "-c", "[email protected]"]
    env = {"GIT_CONFIG_GLOBAL": "/dev/null"}
    repo_path = str(repo)
    subprocess_run(git + ["init"], cwd=repo_path, env=env)
    subprocess_run(git + ["add", "stata.lic"], cwd=repo_path, env=env)
    subprocess_run(
        git + ["commit", "--no-gpg-sign", "-m", "test"], cwd=repo_path, env=env
    )
    return repo_path
Exemple #14
0
def find_docker_entities(entity, label):
    """
    Return list of names of all docker entities (of specified type) matching
    `label`
    """
    response = subprocess_run(
        [
            "docker",
            entity,
            "ls",
            *(["--all"] if entity == "container" else []),
            "--filter",
            f"label={label}",
            "--format",
            "{{ .Names }}" if entity == "container" else "{{ .Name }}",
            "--quiet",
        ],
        capture_output=True,
        encoding="ascii",
    )
    return response.stdout.split()
Exemple #15
0
def read_file_from_repo(repo_url, commit_sha, path):
    """
    Return the contents of the file at `path` in `repo_url` as of `commit_sha`
    """
    repo_dir = get_local_repo_dir(repo_url)
    ensure_commit_fetched(repo_dir, repo_url, commit_sha)
    try:
        response = subprocess_run(
            ["git", "show", f"{commit_sha}:{path}"],
            capture_output=True,
            check=True,
            cwd=repo_dir,
        )
    except subprocess.SubprocessError as e:
        if e.stderr.startswith(
                b"fatal: path ") and b"does not exist" in e.stderr:
            raise GitFileNotFoundError(
                f"File '{path}' not found in repository")
        else:
            log.exception(f"Error reading from {repo_url} @ {commit_sha}")
            raise GitError(f"Error reading from {repo_url} @ {commit_sha}")
    # Note the response here is bytes not text as git doesn't know what
    # encoding the file is supposed to have
    return response.stdout
Exemple #16
0
def ensure_docker_images_present(*images):
    for image in images:
        full_image = f"{config.DOCKER_REGISTRY}/{image}"
        if not docker.image_exists_locally(full_image):
            subprocess_run(["docker", "pull", "--quiet", full_image], check=True)
Exemple #17
0
def ensure_git_init(repo_dir):
    if not os.path.exists(repo_dir / "config"):
        subprocess_run(["git", "init", "--bare", "--quiet", repo_dir],
                       check=True)