def test_fetch_one_coord_with_no_deps(rule_runner: RuleRunner) -> None:

    classpath_entry = rule_runner.request(
        ClasspathEntry,
        [
            CoursierLockfileEntry(
                coord=HAMCREST_COORD,
                file_name="hamcrest-core-1.3.jar",
                direct_dependencies=Coordinates([]),
                dependencies=Coordinates([]),
                file_digest=FileDigest(
                    fingerprint=
                    "66fdef91e9739348df7a096aa384a5685f4e875584cce89386a7a47251c4d8e9",
                    serialized_bytes_length=45024,
                ),
            )
        ],
    )
    assert classpath_entry.filenames == ("hamcrest-core-1.3.jar", )
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(classpath_entry.digest, "hamcrest-core-1.3.jar")])
    assert file_digest == FileDigest(
        fingerprint=
        "66fdef91e9739348df7a096aa384a5685f4e875584cce89386a7a47251c4d8e9",
        serialized_bytes_length=45024,
    )
def test_fetch_one_coord_with_transitive_deps(rule_runner: RuleRunner) -> None:
    junit_coord = Coordinate(group="junit", artifact="junit", version="4.13.2")
    classpath_entry = rule_runner.request(
        ClasspathEntry,
        [
            CoursierLockfileEntry(
                coord=junit_coord,
                file_name="junit-4.13.2.jar",
                direct_dependencies=Coordinates([HAMCREST_COORD]),
                dependencies=Coordinates([HAMCREST_COORD]),
                file_digest=FileDigest(
                    fingerprint=
                    "8e495b634469d64fb8acfa3495a065cbacc8a0fff55ce1e31007be4c16dc57d3",
                    serialized_bytes_length=384581,
                ),
            )
        ],
    )
    assert classpath_entry.filenames == ("junit-4.13.2.jar", )
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(classpath_entry.digest, "junit-4.13.2.jar")])
    assert file_digest == FileDigest(
        fingerprint=
        "8e495b634469d64fb8acfa3495a065cbacc8a0fff55ce1e31007be4c16dc57d3",
        serialized_bytes_length=384581,
    )
Beispiel #3
0
def test_subset_with_multiple_files(rule_runner: RuleRunner) -> None:
    digest = get_digest(rule_runner, {"foo.txt": "", "bar.txt": ""})
    with pytest.raises(Exception, match=r".*?found multiple times.*?"):
        rule_runner.request(
            FileDigest,
            [ExtractFileDigest(digest=digest, file_path="*")],
        )
Beispiel #4
0
def test_extract_missing_file(rule_runner: RuleRunner) -> None:
    digest = get_digest(rule_runner, {"foo.txt": ""})
    with pytest.raises(Exception, match=r".*?not found in.*?"):
        rule_runner.request(
            FileDigest,
            [ExtractFileDigest(digest=digest, file_path="missing")],
        )
Beispiel #5
0
def test_extract_empty_file(rule_runner: RuleRunner) -> None:
    digest = get_digest(rule_runner, {"foo.txt": ""})
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(digest=digest, file_path="foo.txt")],
    )
    assert file_digest == EMPTY_FILE_DIGEST
def test_fetch_one_coord_with_transitive_deps(rule_runner: RuleRunner) -> None:

    classpath_entry = rule_runner.request(
        ResolvedClasspathEntry,
        [
            CoursierLockfileEntry(
                coord=MavenCoord(coord="junit:junit:4.13.2"),
                file_name="junit-4.13.2.jar",
                direct_dependencies=MavenCoordinates(
                    [MavenCoord(coord="org.hamcrest:hamcrest-core:1.3")]),
                dependencies=MavenCoordinates(
                    [MavenCoord(coord="org.hamcrest:hamcrest-core:1.3")]),
                file_digest=FileDigest(
                    fingerprint=
                    "8e495b634469d64fb8acfa3495a065cbacc8a0fff55ce1e31007be4c16dc57d3",
                    serialized_bytes_length=384581,
                ),
            )
        ],
    )
    assert classpath_entry.coord == MavenCoord(coord="junit:junit:4.13.2")
    assert classpath_entry.file_name == "junit-4.13.2.jar"
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(classpath_entry.digest, "junit-4.13.2.jar")])
    assert file_digest == FileDigest(
        fingerprint=
        "8e495b634469d64fb8acfa3495a065cbacc8a0fff55ce1e31007be4c16dc57d3",
        serialized_bytes_length=384581,
    )
Beispiel #7
0
def test_extract_nonempty_file(rule_runner: RuleRunner) -> None:
    digest = get_digest(rule_runner, {"foo.txt": "bar"})
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(digest=digest, file_path="foo.txt")],
    )
    hasher = hashlib.sha256()
    hasher.update(b"bar")
    assert file_digest == FileDigest(fingerprint=hasher.hexdigest(),
                                     serialized_bytes_length=3)
Beispiel #8
0
def test_fetch_one_coord_with_jar(rule_runner: RuleRunner) -> None:
    coord = Coordinate(group="jeremy", artifact="jeremy", version="4.13.2")
    file_name = f"{coord.group}_{coord.artifact}_{coord.version}.jar"
    rule_runner.write_files({
        "BUILD":
        textwrap.dedent(f"""\
            jvm_artifact(
              name="jeremy",
              group="{coord.group}",
              artifact="{coord.artifact}",
              version="{coord.version}",
              jar="jeremy.jar",
            )
            """),
        "jeremy.jar":
        "hello dave",
    })

    classpath_entry = rule_runner.request(
        ClasspathEntry,
        [
            CoursierLockfileEntry(
                coord=coord,
                file_name=file_name,
                direct_dependencies=Coordinates([]),
                dependencies=Coordinates([]),
                file_digest=FileDigest(
                    fingerprint=
                    "55b9afa8d7776cd6c318eec51f506e9c7f66c247dcec343d4667f5f269714f86",
                    serialized_bytes_length=10,
                ),
                pants_address="//:jeremy",
            )
        ],
    )
    assert classpath_entry.filenames == (file_name, )
    file_digest = rule_runner.request(
        FileDigest,
        [ExtractFileDigest(classpath_entry.digest, file_name)],
    )
    assert file_digest == FileDigest(
        fingerprint=
        "55b9afa8d7776cd6c318eec51f506e9c7f66c247dcec343d4667f5f269714f86",
        serialized_bytes_length=10,
    )
Beispiel #9
0
async def coursier_fetch_one_coord(
    bash: BashBinary,
    coursier: Coursier,
    request: CoursierLockfileEntry,
) -> ResolvedClasspathEntry:
    """Run `coursier fetch --intrasitive` to fetch a single artifact.

    This rule exists to permit efficient subsetting of a "global" classpath
    in the form of a lockfile.  Callers can determine what subset of dependencies
    from the lockfile are needed for a given target, then request those
    lockfile entries individually.

    By fetching only one entry at a time, we maximize our cache efficiency.  If instead
    we fetched the entire subset that the caller wanted, there would be a different cache
    key for every possible subset.

    This rule also guarantees exact reproducibility.  If all caches have been
    removed, `coursier fetch` will re-download the artifact, and this rule will
    confirm that what was downloaded matches exactly (by content digest) what
    was specified in the lockfile (what Coursier originally downloaded).
    """
    coursier_report_file_name = "coursier_report.json"
    process_result = await Get(
        ProcessResult,
        Process(
            argv=[
                bash.path,
                coursier.wrapper_script,
                coursier.coursier.exe,
                coursier_report_file_name,
                "--intransitive",
                request.coord.to_coord_str(),
            ],
            input_digest=coursier.digest,
            output_directories=("classpath",),
            output_files=(coursier_report_file_name,),
            description="Run coursier resolve",
            level=LogLevel.DEBUG,
        ),
    )
    report_digest = await Get(
        Digest, DigestSubset(process_result.output_digest, PathGlobs([coursier_report_file_name]))
    )
    report_contents = await Get(DigestContents, Digest, report_digest)
    report = json.loads(report_contents[0].content)

    report_deps = report["dependencies"]
    if len(report_deps) == 0:
        raise CoursierError("Coursier fetch report has no dependencies (i.e. nothing was fetched).")
    elif len(report_deps) > 1:
        raise CoursierError(
            "Coursier fetch report has multiple dependencies, but exactly 1 was expected."
        )

    dep = report_deps[0]

    resolved_coord = Coordinate.from_coord_str(dep["coord"])
    if resolved_coord != request.coord:
        raise CoursierError(
            f'Coursier resolved coord "{resolved_coord.to_coord_str()}" does not match requested coord "{request.coord.to_coord_str()}".'
        )

    file_path = PurePath(dep["file"])
    classpath_dest = f"classpath/{file_path.name}"

    resolved_file_digest = await Get(
        Digest, DigestSubset(process_result.output_digest, PathGlobs([classpath_dest]))
    )
    stripped_digest = await Get(Digest, RemovePrefix(resolved_file_digest, "classpath"))
    file_digest = await Get(
        FileDigest,
        ExtractFileDigest(stripped_digest, file_path.name),
    )
    if file_digest != request.file_digest:
        raise CoursierError(
            f"Coursier fetch for '{resolved_coord}' succeeded, but fetched artifact {file_digest} did not match the expected artifact: {request.file_digest}."
        )
    return ResolvedClasspathEntry(
        coord=request.coord, file_name=file_path.name, digest=stripped_digest
    )
Beispiel #10
0
async def coursier_resolve_lockfile(
    bash: BashBinary,
    coursier: Coursier,
    artifact_requirements: ArtifactRequirements,
) -> CoursierResolvedLockfile:
    """Run `coursier fetch ...` against a list of Maven coordinates and capture the result.

    This rule does two things in a single Process invocation:

        * Runs `coursier fetch` to let Coursier do the heavy lifting of resolving
          dependencies and downloading resolved artifacts (jars, etc).
        * Copies the resolved artifacts into the Process output directory, capturing
          the artifacts as content-addressed `Digest`s.

    It's important that this happens in the same process, since the process isn't
    guaranteed to run on the same machine as the rule, nor is a subsequent process
    invocation.  This guarantees that whatever Coursier resolved, it was fully
    captured into Pants' content addressed artifact storage.

    Note however that we still get the benefit of Coursier's "global" cache if it
    had already been run on the machine where the `coursier fetch` runs, so rerunning
    `coursier fetch` tends to be fast in practice.

    Finally, this rule bundles up the result into a `CoursierResolvedLockfile`.  This
    data structure encapsulates everything necessary to either materialize the
    resolved dependencies to a classpath for Java invocations, or to write the
    lockfile out to the workspace to hermetically freeze the result of the resolve.
    """

    if len(artifact_requirements) == 0:
        return CoursierResolvedLockfile(entries=())

    coursier_report_file_name = "coursier_report.json"
    process_result = await Get(
        ProcessResult,
        Process(
            argv=[
                bash.path,
                coursier.wrapper_script,
                coursier.coursier.exe,
                coursier_report_file_name,
                *(req.to_coord_str() for req in artifact_requirements),
            ],
            input_digest=coursier.digest,
            output_directories=("classpath",),
            output_files=(coursier_report_file_name,),
            description=(
                "Running `coursier fetch` against "
                f"{pluralize(len(artifact_requirements), 'requirement')}: "
                f"{', '.join(req.to_coord_str() for req in artifact_requirements)}"
            ),
            level=LogLevel.DEBUG,
        ),
    )
    report_digest = await Get(
        Digest, DigestSubset(process_result.output_digest, PathGlobs([coursier_report_file_name]))
    )
    report_contents = await Get(DigestContents, Digest, report_digest)
    report = json.loads(report_contents[0].content)

    artifact_file_names = tuple(PurePath(dep["file"]).name for dep in report["dependencies"])
    artifact_output_paths = tuple(f"classpath/{file_name}" for file_name in artifact_file_names)
    artifact_digests = await MultiGet(
        Get(Digest, DigestSubset(process_result.output_digest, PathGlobs([output_path])))
        for output_path in artifact_output_paths
    )
    stripped_artifact_digests = await MultiGet(
        Get(Digest, RemovePrefix(artifact_digest, "classpath"))
        for artifact_digest in artifact_digests
    )
    artifact_file_digests = await MultiGet(
        Get(FileDigest, ExtractFileDigest(stripped_artifact_digest, file_name))
        for stripped_artifact_digest, file_name in zip(
            stripped_artifact_digests, artifact_file_names
        )
    )
    return CoursierResolvedLockfile(
        entries=tuple(
            CoursierLockfileEntry(
                coord=Coordinate.from_coord_str(dep["coord"]),
                direct_dependencies=Coordinates(
                    Coordinate.from_coord_str(dd) for dd in dep["directDependencies"]
                ),
                dependencies=Coordinates(Coordinate.from_coord_str(d) for d in dep["dependencies"]),
                file_name=file_name,
                file_digest=artifact_file_digest,
            )
            for dep, file_name, artifact_file_digest in zip(
                report["dependencies"], artifact_file_names, artifact_file_digests
            )
        )
    )
Beispiel #11
0
async def coursier_fetch_one_coord(
    request: CoursierLockfileEntry, ) -> ClasspathEntry:
    """Run `coursier fetch --intransitive` to fetch a single artifact.

    This rule exists to permit efficient subsetting of a "global" classpath
    in the form of a lockfile.  Callers can determine what subset of dependencies
    from the lockfile are needed for a given target, then request those
    lockfile entries individually.

    By fetching only one entry at a time, we maximize our cache efficiency.  If instead
    we fetched the entire subset that the caller wanted, there would be a different cache
    key for every possible subset.

    This rule also guarantees exact reproducibility.  If all caches have been
    removed, `coursier fetch` will re-download the artifact, and this rule will
    confirm that what was downloaded matches exactly (by content digest) what
    was specified in the lockfile (what Coursier originally downloaded).
    """

    # Prepare any URL- or JAR-specifying entries for use with Coursier
    req: ArtifactRequirement
    if request.pants_address:
        targets = await Get(
            Targets,
            UnparsedAddressInputs([request.pants_address],
                                  owning_address=None,
                                  description_of_origin="TODO(#14468)"),
        )
        req = ArtifactRequirement(request.coord,
                                  jar=targets[0][JvmArtifactJarSourceField])
    else:
        req = ArtifactRequirement(request.coord, url=request.remote_url)

    coursier_resolve_info = await Get(
        CoursierResolveInfo,
        ArtifactRequirements([req]),
    )

    coursier_report_file_name = "coursier_report.json"

    process_result = await Get(
        ProcessResult,
        CoursierFetchProcess(
            args=(
                coursier_report_file_name,
                "--intransitive",
                *coursier_resolve_info.argv,
            ),
            input_digest=coursier_resolve_info.digest,
            output_directories=("classpath", ),
            output_files=(coursier_report_file_name, ),
            description=
            f"Fetching with coursier: {request.coord.to_coord_str()}",
        ),
    )
    report_digest = await Get(
        Digest,
        DigestSubset(process_result.output_digest,
                     PathGlobs([coursier_report_file_name])))
    report_contents = await Get(DigestContents, Digest, report_digest)
    report = json.loads(report_contents[0].content)

    report_deps = report["dependencies"]
    if len(report_deps) == 0:
        raise CoursierError(
            "Coursier fetch report has no dependencies (i.e. nothing was fetched)."
        )
    elif len(report_deps) > 1:
        raise CoursierError(
            "Coursier fetch report has multiple dependencies, but exactly 1 was expected."
        )

    dep = report_deps[0]
    resolved_coord = Coordinate.from_coord_str(dep["coord"])
    if resolved_coord != request.coord:
        raise CoursierError(
            f'Coursier resolved coord "{resolved_coord.to_coord_str()}" does not match requested coord "{request.coord.to_coord_str()}".'
        )

    classpath_dest_name = classpath_dest_filename(dep["coord"], dep["file"])
    classpath_dest = f"classpath/{classpath_dest_name}"

    resolved_file_digest = await Get(
        Digest,
        DigestSubset(process_result.output_digest,
                     PathGlobs([classpath_dest])))
    stripped_digest = await Get(
        Digest, RemovePrefix(resolved_file_digest, "classpath"))
    file_digest = await Get(
        FileDigest,
        ExtractFileDigest(stripped_digest, classpath_dest_name),
    )
    if file_digest != request.file_digest:
        raise CoursierError(
            f"Coursier fetch for '{resolved_coord}' succeeded, but fetched artifact {file_digest} did not match the expected artifact: {request.file_digest}."
        )
    return ClasspathEntry(digest=stripped_digest,
                          filenames=(classpath_dest_name, ))