Example #1
0
async def strip_source_roots_from_snapshot(
    request: StripSnapshotRequest, ) -> SourceRootStrippedSources:
    """Removes source roots from a snapshot, e.g. `src/python/pants/util/strutil.py` ->
    `pants/util/strutil.py`."""
    if not request.snapshot.files:
        return SourceRootStrippedSources(request.snapshot, FrozenDict())

    if request.representative_path is not None:
        source_root_obj = await Get[SourceRoot](
            SourceRootRequest,
            SourceRootRequest.for_file(request.representative_path))
        source_root = source_root_obj.path
        if source_root == ".":
            return SourceRootStrippedSources.for_single_source_root(
                request.snapshot, source_root)
        resulting_snapshot = await Get[Snapshot](RemovePrefix(
            request.snapshot.digest, source_root))
        return SourceRootStrippedSources.for_single_source_root(
            resulting_snapshot, source_root)

    source_roots = await MultiGet(
        Get[SourceRoot](SourceRootRequest, SourceRootRequest.for_file(file))
        for file in request.snapshot.files)
    file_to_source_root = dict(zip(request.snapshot.files, source_roots))
    files_grouped_by_source_root = {
        source_root.path: tuple(files)
        for source_root, files in itertools.groupby(
            request.snapshot.files, key=file_to_source_root.__getitem__)
    }

    if len(files_grouped_by_source_root) == 1:
        source_root = next(iter(files_grouped_by_source_root.keys()))
        if source_root == ".":
            return SourceRootStrippedSources.for_single_source_root(
                request.snapshot, source_root)
        resulting_snapshot = await Get[Snapshot](RemovePrefix(
            request.snapshot.digest, source_root))
        return SourceRootStrippedSources.for_single_source_root(
            resulting_snapshot, source_root)

    snapshot_subsets = await MultiGet(
        Get[Snapshot](SnapshotSubset(request.snapshot.digest, PathGlobs(
            files))) for files in files_grouped_by_source_root.values())
    resulting_digests = await MultiGet(
        Get[Digest](RemovePrefix(snapshot.digest, source_root))
        for snapshot, source_root in zip(snapshot_subsets,
                                         files_grouped_by_source_root.keys()))

    resulting_snapshot = await Get[Snapshot](MergeDigests(resulting_digests))
    return SourceRootStrippedSources(
        resulting_snapshot,
        FrozenDict({
            source_root: tuple(file[len(source_root) + 1:] for file in files)
            for source_root, files in files_grouped_by_source_root.items()
        }),
    )
Example #2
0
async def strip_sources_paths(sources_paths: SourcesPaths) -> StrippedSourceFileNames:
    if not sources_paths.files:
        return StrippedSourceFileNames()
    source_root = await Get(
        SourceRoot, SourceRootRequest, SourceRootRequest.for_file(sources_paths.files[0])
    )
    if source_root.path == ".":
        return StrippedSourceFileNames(sources_paths.files)
    return StrippedSourceFileNames(fast_relpath(f, source_root.path) for f in sources_paths.files)
async def strip_file_name(request: StrippedFileNameRequest) -> StrippedFileName:
    source_root = await Get(
        SourceRoot, SourceRootRequest, SourceRootRequest.for_file(request.file_path)
    )
    return StrippedFileName(
        request.file_path
        if source_root.path == "."
        else fast_relpath(request.file_path, source_root.path)
    )
Example #4
0
async def resolve_pex_entry_point(request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint:
    ep_val = request.entry_point_field.value
    if ep_val is None:
        return ResolvedPexEntryPoint(None, file_name_used=False)
    address = request.entry_point_field.address

    # We support several different schemes:
    #  1) `path.to.module` => preserve exactly.
    #  2) `path.to.module:func` => preserve exactly.
    #  3) `app.py` => convert into `path.to.app`.
    #  4) `app.py:func` => convert into `path.to.app:func`.

    # If it's already a module (cases #1 and #2), simply use that. Otherwise, convert the file name
    # into a module path (cases #3 and #4).
    if not ep_val.module.endswith(".py"):
        return ResolvedPexEntryPoint(ep_val, file_name_used=False)

    # Use the engine to validate that the file exists and that it resolves to only one file.
    full_glob = os.path.join(address.spec_path, ep_val.module)
    entry_point_paths = await Get(
        Paths,
        PathGlobs(
            [full_glob],
            glob_match_error_behavior=GlobMatchErrorBehavior.error,
            description_of_origin=f"{address}'s `{request.entry_point_field.alias}` field",
        ),
    )
    # We will have already raised if the glob did not match, i.e. if there were no files. But
    # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file.
    if len(entry_point_paths.files) != 1:
        raise InvalidFieldException(
            softwrap(
                f"""
                Multiple files matched for the `{request.entry_point_field.alias}`
                {ep_val.spec!r} for the target {address}, but only one file expected. Are you using
                a glob, rather than a file name?

                All matching files: {list(entry_point_paths.files)}.
                """
            )
        )
    entry_point_path = entry_point_paths.files[0]
    source_root = await Get(
        SourceRoot,
        SourceRootRequest,
        SourceRootRequest.for_file(entry_point_path),
    )
    stripped_source_path = os.path.relpath(entry_point_path, source_root.path)
    module_base, _ = os.path.splitext(stripped_source_path)
    normalized_path = module_base.replace(os.path.sep, ".")
    return ResolvedPexEntryPoint(
        dataclasses.replace(ep_val, module=normalized_path), file_name_used=True
    )
Example #5
0
async def resolve_python_aws_handler(
    request: ResolvePythonAwsHandlerRequest,
) -> ResolvedPythonAwsHandler:
    handler_val = request.field.value
    field_alias = request.field.alias
    address = request.field.address
    path, _, func = handler_val.partition(":")

    # If it's already a module, simply use that. Otherwise, convert the file name into a module
    # path.
    if not path.endswith(".py"):
        return ResolvedPythonAwsHandler(handler_val, file_name_used=False)

    # Use the engine to validate that the file exists and that it resolves to only one file.
    full_glob = os.path.join(address.spec_path, path)
    handler_paths = await Get(
        Paths,
        PathGlobs(
            [full_glob],
            glob_match_error_behavior=GlobMatchErrorBehavior.error,
            description_of_origin=f"{address}'s `{field_alias}` field",
        ),
    )
    # We will have already raised if the glob did not match, i.e. if there were no files. But
    # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file.
    if len(handler_paths.files) != 1:
        raise InvalidFieldException(
            softwrap(
                f"""
                Multiple files matched for the `{field_alias}` {repr(handler_val)} for the target
                {address}, but only one file expected.
                Are you using a glob, rather than a file name?

                All matching files: {list(handler_paths.files)}.
                """
            )
        )
    handler_path = handler_paths.files[0]
    source_root = await Get(
        SourceRoot,
        SourceRootRequest,
        SourceRootRequest.for_file(handler_path),
    )
    stripped_source_path = os.path.relpath(handler_path, source_root.path)
    module_base, _ = os.path.splitext(stripped_source_path)
    normalized_path = module_base.replace(os.path.sep, ".")
    return ResolvedPythonAwsHandler(f"{normalized_path}:{func}", file_name_used=True)
Example #6
0
async def package_pex_binary(
    field_set: PexBinaryFieldSet,
    pex_binary_defaults: PexBinaryDefaults,
    global_options: GlobalOptions,
) -> BuiltPackage:
    entry_point = field_set.entry_point.value
    if entry_point is None:
        binary_source_paths = await Get(
            Paths, PathGlobs,
            field_set.sources.path_globs(FilesNotFoundBehavior.error))
        if len(binary_source_paths.files) != 1:
            raise InvalidFieldException(
                "No `entry_point` was set for the target "
                f"{repr(field_set.address)}, so it must have exactly one source, but it has "
                f"{len(binary_source_paths.files)}")
        entry_point_path = binary_source_paths.files[0]
        source_root = await Get(
            SourceRoot,
            SourceRootRequest,
            SourceRootRequest.for_file(entry_point_path),
        )
        entry_point = PexBinarySources.translate_source_file_to_entry_point(
            os.path.relpath(entry_point_path, source_root.path))

    output_filename = field_set.output_path.value_or_default(
        field_set.address,
        file_ending="pex",
        use_legacy_format=global_options.options.pants_distdir_legacy_paths,
    )
    two_step_pex = await Get(
        TwoStepPex,
        TwoStepPexFromTargetsRequest(
            PexFromTargetsRequest(
                addresses=[field_set.address],
                internal_only=False,
                entry_point=entry_point,
                platforms=PexPlatforms.create_from_platforms_field(
                    field_set.platforms),
                output_filename=output_filename,
                additional_args=field_set.generate_additional_args(
                    pex_binary_defaults),
            )),
    )
    return BuiltPackage(two_step_pex.pex.digest,
                        (BuiltPackageArtifact(output_filename), ))
Example #7
0
async def resolve_pex_entry_point(
        request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint:
    if request.entry_point_field.value:
        return ResolvedPexEntryPoint(request.entry_point_field.value)
    binary_source_paths = await Get(
        Paths, PathGlobs,
        request.sources.path_globs(FilesNotFoundBehavior.error))
    if len(binary_source_paths.files) != 1:
        raise InvalidFieldException(
            "No `entry_point` was set for the target "
            f"{repr(request.sources.address)}, so it must have exactly one source, but it has "
            f"{len(binary_source_paths.files)}.")
    entry_point_path = binary_source_paths.files[0]
    source_root = await Get(
        SourceRoot,
        SourceRootRequest,
        SourceRootRequest.for_file(entry_point_path),
    )
    stripped_source_path = os.path.relpath(entry_point_path, source_root.path)
    module_base, _ = os.path.splitext(stripped_source_path)
    return ResolvedPexEntryPoint(module_base.replace(os.path.sep, "."))
Example #8
0
async def get_ancestor_init_py(targets: Targets) -> AncestorInitPyFiles:
    """Find any ancestor __init__.py files for the given targets.

    Includes sibling __init__.py files. Returns the files stripped of their source roots.
    """
    sources = await Get[SourceFiles](AllSourceFilesRequest(
        (tgt.get(Sources) for tgt in targets),
        for_sources_types=(PythonSources, ),
        enable_codegen=True,
    ))
    # Find the ancestors of all dirs containing .py files, including those dirs themselves.
    source_dir_ancestors: Set[Tuple[
        str, str]] = set()  # Items are (src_root, path incl. src_root).
    source_roots = await MultiGet(
        Get[SourceRoot](SourceRootRequest, SourceRootRequest.for_file(path))
        for path in sources.files)
    for path, source_root in zip(sources.files, source_roots):
        source_dir_ancestor = os.path.dirname(path)
        # Do not allow the repository root to leak (i.e., '.' should not be a package in setup.py).
        while source_dir_ancestor != source_root.path:
            source_dir_ancestors.add((source_root.path, source_dir_ancestor))
            source_dir_ancestor = os.path.dirname(source_dir_ancestor)

    source_dir_ancestors_list = list(
        source_dir_ancestors)  # To force a consistent order.

    # Note that we must MultiGet single globs instead of a a single Get for all the globs, because
    # we match each result to its originating glob (see use of zip below).
    ancestor_init_py_snapshots = await MultiGet(
        Get[Snapshot](
            PathGlobs,
            PathGlobs([os.path.join(source_dir_ancestor[1], "__init__.py")]))
        for source_dir_ancestor in source_dir_ancestors_list)

    source_root_stripped_ancestor_init_pys = await MultiGet(
        Get[Digest](RemovePrefix(snapshot.digest, source_dir_ancestor[0]))
        for snapshot, source_dir_ancestor in zip(ancestor_init_py_snapshots,
                                                 source_dir_ancestors_list))

    return AncestorInitPyFiles(source_root_stripped_ancestor_init_pys)
Example #9
0
async def prepare_python_sources(
        request: PythonSourceFilesRequest,
        union_membership: UnionMembership) -> PythonSourceFiles:
    sources = await Get(
        SourceFiles,
        SourceFilesRequest(
            (tgt.get(SourcesField) for tgt in request.targets),
            for_sources_types=request.valid_sources_types,
            enable_codegen=True,
        ),
    )

    missing_init_files = await Get(
        AncestorFiles,
        AncestorFilesRequest(input_files=sources.snapshot.files,
                             requested=("__init__.py", "__init__.pyi")),
    )
    init_injected = await Get(
        Snapshot,
        MergeDigests(
            (sources.snapshot.digest, missing_init_files.snapshot.digest)))

    # Codegen is able to generate code in any arbitrary location, unlike sources normally being
    # rooted under the target definition. To determine source roots for these generated files, we
    # cannot use the normal `SourceRootRequest.for_target()` and we instead must determine
    # a source root for every individual generated file. So, we re-resolve the codegen sources here.
    python_and_resources_targets = []
    codegen_targets = []
    for tgt in request.targets:
        if tgt.has_field(PythonSourceField) or tgt.has_field(
                ResourceSourceField):
            python_and_resources_targets.append(tgt)
        elif tgt.get(SourcesField).can_generate(
                PythonSourceField,
                union_membership) or tgt.get(SourcesField).can_generate(
                    ResourceSourceField, union_membership):
            codegen_targets.append(tgt)
    codegen_sources = await MultiGet(
        Get(
            HydratedSources,
            HydrateSourcesRequest(
                tgt.get(SourcesField),
                for_sources_types=request.valid_sources_types,
                enable_codegen=True,
            ),
        ) for tgt in codegen_targets)
    source_root_requests = [
        *(SourceRootRequest.for_target(tgt)
          for tgt in python_and_resources_targets),
        *(SourceRootRequest.for_file(f) for sources in codegen_sources
          for f in sources.snapshot.files),
    ]

    source_root_objs = await MultiGet(
        Get(SourceRoot, SourceRootRequest, req)
        for req in source_root_requests)
    source_root_paths = {
        source_root_obj.path
        for source_root_obj in source_root_objs
    }
    return PythonSourceFiles(
        SourceFiles(init_injected, sources.unrooted_files),
        tuple(sorted(source_root_paths)))
Example #10
0
async def resolve_pex_entry_point(
        request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint:
    ep_val = request.entry_point_field.value
    ep_alias = request.entry_point_field.alias
    address = request.entry_point_field.address

    # TODO: factor up some of this code between python_awslambda and pex_binary once `sources` is
    #  removed.

    # This code is tricky, as we support several different schemes:
    #  1) `<none>` or `<None>` => set to `None`.
    #  2) `path.to.module` => preserve exactly.
    #  3) `path.to.module:func` => preserve exactly.
    #  4) `app.py` => convert into `path.to.app`.
    #  5) `app.py:func` => convert into `path.to.app:func`.

    if ep_val is None:
        instructions_url = docs_url(
            "python-package-goal#creating-a-pex-file-from-a-pex_binary-target")
        raise InvalidFieldException(
            f"The `{ep_alias}` field is not set for the target {address}. Run "
            f"`./pants help pex_binary` for more information on how to set the field or "
            f"see {instructions_url}.")

    # Case #1.
    if ep_val.module in ("<none>", "<None>"):
        return ResolvedPexEntryPoint(None)

    # If it's already a module (cases #2 and #3), simply use that. Otherwise, convert the file name
    # into a module path (cases #4 and #5).
    if not ep_val.module.endswith(".py"):
        return ResolvedPexEntryPoint(ep_val)

    # Use the engine to validate that the file exists and that it resolves to only one file.
    full_glob = os.path.join(address.spec_path, ep_val.module)
    entry_point_paths = await Get(
        Paths,
        PathGlobs(
            [full_glob],
            glob_match_error_behavior=GlobMatchErrorBehavior.error,
            description_of_origin=
            f"{address}'s `{request.entry_point_field.alias}` field",
        ),
    )
    # We will have already raised if the glob did not match, i.e. if there were no files. But
    # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file.
    if len(entry_point_paths.files) != 1:
        raise InvalidFieldException(
            f"Multiple files matched for the `{ep_alias}` {ep_val.spec!r} for the target "
            f"{address}, but only one file expected. Are you using a glob, rather than a file "
            f"name?\n\nAll matching files: {list(entry_point_paths.files)}.")
    entry_point_path = entry_point_paths.files[0]
    source_root = await Get(
        SourceRoot,
        SourceRootRequest,
        SourceRootRequest.for_file(entry_point_path),
    )
    stripped_source_path = os.path.relpath(entry_point_path, source_root.path)
    module_base, _ = os.path.splitext(stripped_source_path)
    normalized_path = module_base.replace(os.path.sep, ".")
    return ResolvedPexEntryPoint(
        dataclasses.replace(ep_val, module=normalized_path))
Example #11
0
async def resolve_pex_entry_point(
        request: ResolvePexEntryPointRequest) -> ResolvedPexEntryPoint:
    ep_val = request.entry_point_field.value
    ep_alias = request.entry_point_field.alias
    address = request.entry_point_field.address

    # TODO: factor up some of this code between python_awslambda and pex_binary once `sources` is
    #  removed.

    # This code is tricky, as we support several different schemes:
    #  1) `<none>` or `<None>` => set to `None`.
    #  2) `path.to.module` => preserve exactly.
    #  3) `path.to.module:func` => preserve exactly.
    #  4) `app.py` => convert into `path.to.app`.
    #  5) `app.py:func` => convert into `path.to.app:func`.
    #  6) `:func` => if there's a sources field, convert to `path.to.sources:func` (deprecated).
    #  7) no entry point field, but `sources` field => convert to `path.to.sources` (deprecated).

    # Handle deprecated cases #6 and #7, which are the only cases where the `sources` field matters
    # for calculating the entry point.
    if not ep_val or ep_val.startswith(":"):
        binary_source_paths = await Get(
            Paths, PathGlobs,
            request.sources.path_globs(FilesNotFoundBehavior.error))
        if len(binary_source_paths.files) != 1:
            instructions_url = docs_url(
                "python-package-goal#creating-a-pex-file-from-a-pex_binary-target"
            )
            if not ep_val:
                raise InvalidFieldException(
                    f"The `{ep_alias}` field is not set for the target {address}. Run "
                    f"`./pants help pex_binary` for more information on how to set the field or "
                    f"see {instructions_url}.")
            raise InvalidFieldException(
                f"The `{ep_alias}` field for the target {address} is set to the short-hand value "
                f"{repr(ep_val)}, but the `sources` field is not set. Pants requires the "
                "`sources` field to expand the entry point to the normalized form "
                f"`path.to.module:{ep_val}`. Please either set the `sources` field to exactly one "
                f"file (deprecated) or set `{ep_alias}='my_file.py:{ep_val}'`. See "
                f"{instructions_url}.")
        entry_point_path = binary_source_paths.files[0]
        source_root = await Get(
            SourceRoot,
            SourceRootRequest,
            SourceRootRequest.for_file(entry_point_path),
        )
        stripped_source_path = os.path.relpath(entry_point_path,
                                               source_root.path)
        module_base, _ = os.path.splitext(stripped_source_path)
        normalized_path = module_base.replace(os.path.sep, ".")
        return ResolvedPexEntryPoint(
            f"{normalized_path}{ep_val}" if ep_val else normalized_path)

    # Case #1.
    if ep_val in ("<none>", "<None>"):
        return ResolvedPexEntryPoint(None)

    path, _, func = ep_val.partition(":")

    # If it's already a module (cases #2 and #3), simply use that. Otherwise, convert the file name
    # into a module path (cases #4 and #5).
    if not path.endswith(".py"):
        return ResolvedPexEntryPoint(ep_val)

    # Use the engine to validate that the file exists and that it resolves to only one file.
    full_glob = os.path.join(address.spec_path, path)
    entry_point_paths = await Get(
        Paths,
        PathGlobs(
            [full_glob],
            glob_match_error_behavior=GlobMatchErrorBehavior.error,
            description_of_origin=
            f"{address}'s `{request.entry_point_field.alias}` field",
        ),
    )
    # We will have already raised if the glob did not match, i.e. if there were no files. But
    # we need to check if they used a file glob (`*` or `**`) that resolved to >1 file.
    if len(entry_point_paths.files) != 1:
        raise InvalidFieldException(
            f"Multiple files matched for the `{ep_alias}` {repr(ep_val)} for the target "
            f"{address}, but only one file expected. Are you using a glob, rather than a file "
            f"name?\n\nAll matching files: {list(entry_point_paths.files)}.")
    entry_point_path = entry_point_paths.files[0]
    source_root = await Get(
        SourceRoot,
        SourceRootRequest,
        SourceRootRequest.for_file(entry_point_path),
    )
    stripped_source_path = os.path.relpath(entry_point_path, source_root.path)
    module_base, _ = os.path.splitext(stripped_source_path)
    normalized_path = module_base.replace(os.path.sep, ".")
    return ResolvedPexEntryPoint(
        f"{normalized_path}:{func}" if func else normalized_path)
Example #12
0
async def create_python_binary_run_request(
    field_set: PythonBinaryFieldSet,
    python_binary_defaults: PythonBinaryDefaults,
    pex_env: PexEnvironment,
) -> RunRequest:
    entry_point = field_set.entry_point.value
    if entry_point is None:
        binary_source_paths = await Get(
            Paths, PathGlobs,
            field_set.sources.path_globs(FilesNotFoundBehavior.error))
        if len(binary_source_paths.files) != 1:
            raise InvalidFieldException(
                "No `entry_point` was set for the target "
                f"{repr(field_set.address)}, so it must have exactly one source, but it has "
                f"{len(binary_source_paths.files)}")
        entry_point_path = binary_source_paths.files[0]
        source_root = await Get(
            SourceRoot,
            SourceRootRequest,
            SourceRootRequest.for_file(entry_point_path),
        )
        entry_point = PythonBinarySources.translate_source_file_to_entry_point(
            os.path.relpath(entry_point_path, source_root.path))
    transitive_targets = await Get(TransitiveTargets,
                                   Addresses([field_set.address]))

    # Note that we get an intermediate PexRequest here (instead of going straight to a Pex)
    # so that we can get the interpreter constraints for use in runner_pex_request.
    requirements_pex_request = await Get(
        PexRequest,
        PexFromTargetsRequest,
        PexFromTargetsRequest.for_requirements([field_set.address],
                                               internal_only=True),
    )

    requirements_request = Get(Pex, PexRequest, requirements_pex_request)

    sources_request = Get(
        PythonSourceFiles,
        PythonSourceFilesRequest(transitive_targets.closure,
                                 include_files=True))

    output_filename = f"{field_set.address.target_name}.pex"
    runner_pex_request = Get(
        Pex,
        PexRequest(
            output_filename=output_filename,
            interpreter_constraints=requirements_pex_request.
            interpreter_constraints,
            additional_args=field_set.generate_additional_args(
                python_binary_defaults),
            internal_only=True,
            # Note that the entry point file is not in the Pex itself, but on the
            # PEX_PATH. This works fine!
            entry_point=entry_point,
        ),
    )

    requirements, sources, runner_pex = await MultiGet(requirements_request,
                                                       sources_request,
                                                       runner_pex_request)

    merged_digest = await Get(
        Digest,
        MergeDigests([
            requirements.digest, sources.source_files.snapshot.digest,
            runner_pex.digest
        ]),
    )

    def in_chroot(relpath: str) -> str:
        return os.path.join("{chroot}", relpath)

    args = pex_env.create_argv(in_chroot(runner_pex.name),
                               python=runner_pex.python)

    chrooted_source_roots = [in_chroot(sr) for sr in sources.source_roots]
    extra_env = {
        **pex_env.environment_dict(python_configured=runner_pex.python is not None),
        "PEX_PATH":
        in_chroot(requirements_pex_request.output_filename),
        "PEX_EXTRA_SYS_PATH":
        ":".join(chrooted_source_roots),
    }

    return RunRequest(digest=merged_digest, args=args, extra_env=extra_env)